Initial JPEG-LS FPGA encoder baseline with tooling and timeout fix
This commit is contained in:
289
fpga/verilog/jls_bit_packer.sv
Normal file
289
fpga/verilog/jls_bit_packer.sv
Normal file
@@ -0,0 +1,289 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 entropy-coded segment syntax
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : JPEG-LS bitstream packing and marker/zero-bit stuffing
|
||||
// Trace : docs/jls_traceability.md#bit-packing-and-stuffing
|
||||
// Example : Data bits 0xFF followed by seven 1 bits produce bytes FF 7F.
|
||||
//
|
||||
// Bit packer for JPEG-LS scan payload bytes. The input code word is left
|
||||
// aligned: the first bit to write is code_bits[MAX_CODE_BITS-1]. After a data
|
||||
// byte equal to 0xFF is emitted, the packer inserts one stuffed zero bit before
|
||||
// the next data bit as required by JPEG-LS marker/zero-bit stuffing.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_bit_packer #(
|
||||
// Maximum code bits accepted in one code event. Longer Golomb codes must be
|
||||
// split by the upstream encoder into multiple ordered events.
|
||||
parameter int MAX_CODE_BITS = 64
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Variable-length code event is valid.
|
||||
input var logic code_valid,
|
||||
|
||||
// This packer can accept a new code event.
|
||||
output logic code_ready,
|
||||
|
||||
// Left-aligned code bits. The first emitted bit is code_bits[MAX_CODE_BITS-1].
|
||||
input var logic [MAX_CODE_BITS-1:0] code_bits,
|
||||
|
||||
// Number of valid bits in code_bits.
|
||||
input var logic [6:0] code_bit_count,
|
||||
|
||||
// Flush request before EOI. The packer pads the current byte with zeros.
|
||||
input var logic flush_valid,
|
||||
|
||||
// This packer can accept a flush request.
|
||||
output logic flush_ready,
|
||||
|
||||
// Flush completed pulse.
|
||||
output logic flush_done,
|
||||
|
||||
// Packed scan payload byte is valid.
|
||||
output logic byte_valid,
|
||||
|
||||
// Downstream byte buffer can accept byte_data.
|
||||
input var logic byte_ready,
|
||||
|
||||
// Packed scan payload byte.
|
||||
output logic [7:0] byte_data
|
||||
);
|
||||
|
||||
// Code shift register and remaining bit count.
|
||||
logic [MAX_CODE_BITS-1:0] code_shift_reg;
|
||||
logic [6:0] bits_remaining;
|
||||
|
||||
// Current partially filled output byte. Bits are filled from MSB to LSB.
|
||||
logic [7:0] partial_byte;
|
||||
logic [3:0] partial_count;
|
||||
|
||||
// A zero bit must be inserted before the next data bit after emitting 0xFF.
|
||||
logic stuff_zero_pending;
|
||||
|
||||
// Flush is active while padding and any required stuffed zero bit remain.
|
||||
logic flush_active;
|
||||
|
||||
// Handshake and processing guards.
|
||||
logic output_slot_open;
|
||||
logic code_event_ready;
|
||||
logic accept_code;
|
||||
logic accept_flush;
|
||||
logic pack_step_active;
|
||||
|
||||
// Combinational work registers for one byte-pack step.
|
||||
logic [MAX_CODE_BITS-1:0] work_shift_reg;
|
||||
logic [6:0] work_bits_remaining;
|
||||
logic [7:0] work_partial_byte;
|
||||
logic [3:0] work_partial_count;
|
||||
logic work_stuff_zero_pending;
|
||||
logic work_flush_active;
|
||||
logic work_emit_byte;
|
||||
logic [7:0] work_emit_data;
|
||||
logic work_flush_done;
|
||||
logic [3:0] available_slots;
|
||||
logic [3:0] bits_to_take;
|
||||
logic [6:0] available_slots_ext;
|
||||
logic [7:0] code_top_byte;
|
||||
logic [7:0] take_mask;
|
||||
logic [7:0] insert_bits;
|
||||
|
||||
always_comb begin
|
||||
output_slot_open = 1'b0;
|
||||
if (!byte_valid || byte_ready) begin
|
||||
output_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
code_event_ready = 1'b0;
|
||||
if (!flush_active && bits_remaining == 7'd0 && output_slot_open) begin
|
||||
code_event_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
code_ready = code_event_ready;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
flush_ready = 1'b0;
|
||||
if (!flush_active && bits_remaining == 7'd0 && output_slot_open && !code_valid) begin
|
||||
flush_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_code = 1'b0;
|
||||
if (code_valid && code_ready && code_bit_count != 7'd0) begin
|
||||
accept_code = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_flush = 1'b0;
|
||||
if (flush_valid && flush_ready) begin
|
||||
accept_flush = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pack_step_active = 1'b0;
|
||||
if (output_slot_open && (bits_remaining != 7'd0 || flush_active || stuff_zero_pending)) begin
|
||||
pack_step_active = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
work_shift_reg = code_shift_reg;
|
||||
work_bits_remaining = bits_remaining;
|
||||
work_partial_byte = partial_byte;
|
||||
work_partial_count = partial_count;
|
||||
work_stuff_zero_pending = stuff_zero_pending;
|
||||
work_flush_active = flush_active;
|
||||
work_emit_byte = 1'b0;
|
||||
work_emit_data = 8'h00;
|
||||
work_flush_done = 1'b0;
|
||||
|
||||
available_slots = 4'd8 - work_partial_count;
|
||||
available_slots_ext = {3'd0, available_slots};
|
||||
bits_to_take = 4'd0;
|
||||
if (work_bits_remaining != 7'd0) begin
|
||||
bits_to_take = work_bits_remaining[3:0];
|
||||
if (work_bits_remaining >= available_slots_ext) begin
|
||||
bits_to_take = available_slots;
|
||||
end
|
||||
end
|
||||
|
||||
code_top_byte = work_shift_reg[MAX_CODE_BITS-1 -: 8];
|
||||
take_mask = 8'h00;
|
||||
case (bits_to_take)
|
||||
4'd1: begin
|
||||
take_mask = 8'h80;
|
||||
end
|
||||
|
||||
4'd2: begin
|
||||
take_mask = 8'hC0;
|
||||
end
|
||||
|
||||
4'd3: begin
|
||||
take_mask = 8'hE0;
|
||||
end
|
||||
|
||||
4'd4: begin
|
||||
take_mask = 8'hF0;
|
||||
end
|
||||
|
||||
4'd5: begin
|
||||
take_mask = 8'hF8;
|
||||
end
|
||||
|
||||
4'd6: begin
|
||||
take_mask = 8'hFC;
|
||||
end
|
||||
|
||||
4'd7: begin
|
||||
take_mask = 8'hFE;
|
||||
end
|
||||
|
||||
4'd8: begin
|
||||
take_mask = 8'hFF;
|
||||
end
|
||||
|
||||
default: begin
|
||||
take_mask = 8'h00;
|
||||
end
|
||||
endcase
|
||||
|
||||
insert_bits = (code_top_byte & take_mask) >> work_partial_count[2:0];
|
||||
|
||||
if (pack_step_active) begin
|
||||
if (work_stuff_zero_pending) begin
|
||||
work_partial_count = work_partial_count + 4'd1;
|
||||
work_stuff_zero_pending = 1'b0;
|
||||
|
||||
if (work_partial_count == 4'd8) begin
|
||||
work_emit_byte = 1'b1;
|
||||
work_emit_data = work_partial_byte;
|
||||
work_partial_byte = 8'h00;
|
||||
work_partial_count = 4'd0;
|
||||
end
|
||||
end else if (work_bits_remaining != 7'd0) begin
|
||||
work_partial_byte = work_partial_byte | insert_bits;
|
||||
work_partial_count = work_partial_count + bits_to_take;
|
||||
work_shift_reg = work_shift_reg << bits_to_take;
|
||||
work_bits_remaining = work_bits_remaining - {3'd0, bits_to_take};
|
||||
|
||||
if (work_partial_count == 4'd8) begin
|
||||
work_emit_byte = 1'b1;
|
||||
work_emit_data = work_partial_byte;
|
||||
if (work_partial_byte == 8'hFF) begin
|
||||
work_stuff_zero_pending = 1'b1;
|
||||
end
|
||||
work_partial_byte = 8'h00;
|
||||
work_partial_count = 4'd0;
|
||||
end
|
||||
end else if (work_flush_active && work_partial_count != 4'd0) begin
|
||||
work_emit_byte = 1'b1;
|
||||
work_emit_data = work_partial_byte;
|
||||
if (work_partial_byte == 8'hFF) begin
|
||||
work_stuff_zero_pending = 1'b1;
|
||||
end
|
||||
work_partial_byte = 8'h00;
|
||||
work_partial_count = 4'd0;
|
||||
end else if (work_flush_active && work_partial_count == 4'd0 &&
|
||||
!work_stuff_zero_pending) begin
|
||||
work_flush_active = 1'b0;
|
||||
work_flush_done = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
code_shift_reg <= {MAX_CODE_BITS{1'b0}};
|
||||
bits_remaining <= 7'd0;
|
||||
partial_byte <= 8'h00;
|
||||
partial_count <= 4'd0;
|
||||
stuff_zero_pending <= 1'b0;
|
||||
flush_active <= 1'b0;
|
||||
flush_done <= 1'b0;
|
||||
byte_valid <= 1'b0;
|
||||
byte_data <= 8'h00;
|
||||
end else begin
|
||||
flush_done <= 1'b0;
|
||||
|
||||
if (output_slot_open) begin
|
||||
byte_valid <= work_emit_byte;
|
||||
byte_data <= work_emit_data;
|
||||
end
|
||||
|
||||
if (pack_step_active) begin
|
||||
code_shift_reg <= work_shift_reg;
|
||||
bits_remaining <= work_bits_remaining;
|
||||
partial_byte <= work_partial_byte;
|
||||
partial_count <= work_partial_count;
|
||||
stuff_zero_pending <= work_stuff_zero_pending;
|
||||
flush_active <= work_flush_active;
|
||||
flush_done <= work_flush_done;
|
||||
end
|
||||
|
||||
if (accept_code) begin
|
||||
code_shift_reg <= code_bits;
|
||||
bits_remaining <= code_bit_count;
|
||||
end
|
||||
|
||||
if (accept_flush) begin
|
||||
flush_active <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
104
fpga/verilog/jls_byte_arbiter.sv
Normal file
104
fpga/verilog/jls_byte_arbiter.sv
Normal file
@@ -0,0 +1,104 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 marker stream byte order
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Marker bytes and entropy-coded bytes in stream order
|
||||
// Trace : docs/jls_traceability.md#jls-header-markers
|
||||
// Example : Header byte FF is forwarded before a waiting payload byte.
|
||||
//
|
||||
// Two-input byte-stream arbiter. Header/EOI bytes have priority over payload
|
||||
// bytes so a strip frame is emitted as SOI/SOF/LSE/SOS, payload, and then EOI.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_byte_arbiter (
|
||||
// Header or EOI byte from jls_header_writer is valid.
|
||||
input var logic header_valid,
|
||||
|
||||
// The selected downstream buffer accepted the header byte.
|
||||
output logic header_ready,
|
||||
|
||||
// Header byte and original-image-start sideband.
|
||||
input var logic [7:0] header_data,
|
||||
input var logic header_original_image_start,
|
||||
|
||||
// Scan payload byte from jls_bit_packer is valid.
|
||||
input var logic payload_valid,
|
||||
|
||||
// The selected downstream buffer accepted the payload byte.
|
||||
output logic payload_ready,
|
||||
|
||||
// Payload byte. It never carries original-image-start sideband.
|
||||
input var logic [7:0] payload_data,
|
||||
|
||||
// Arbitrated byte event to jls_output_buffer.
|
||||
output logic byte_valid,
|
||||
|
||||
// jls_output_buffer accepted the arbitrated byte event.
|
||||
input var logic byte_ready,
|
||||
|
||||
// Arbitrated byte and sideband.
|
||||
output logic [7:0] byte_data,
|
||||
output logic original_image_start
|
||||
);
|
||||
|
||||
// Header stream has priority whenever it is valid.
|
||||
logic select_header;
|
||||
logic select_payload;
|
||||
|
||||
always_comb begin
|
||||
select_header = header_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
select_payload = 1'b0;
|
||||
if (!select_header && payload_valid) begin
|
||||
select_payload = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_valid = 1'b0;
|
||||
if (select_header || select_payload) begin
|
||||
byte_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_data = 8'h00;
|
||||
original_image_start = 1'b0;
|
||||
|
||||
case ({select_header, select_payload})
|
||||
2'b10: begin
|
||||
byte_data = header_data;
|
||||
original_image_start = header_original_image_start;
|
||||
end
|
||||
|
||||
2'b01: begin
|
||||
byte_data = payload_data;
|
||||
end
|
||||
|
||||
default: begin
|
||||
byte_data = 8'h00;
|
||||
original_image_start = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
header_ready = 1'b0;
|
||||
if (select_header && byte_ready) begin
|
||||
header_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
payload_ready = 1'b0;
|
||||
if (select_payload && byte_ready) begin
|
||||
payload_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
251
fpga/verilog/jls_coding_params.sv
Normal file
251
fpga/verilog/jls_coding_params.sv
Normal file
@@ -0,0 +1,251 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : RANGE, qbpp, and LIMIT derivation from MAXVAL and NEAR
|
||||
// Trace : docs/jls_traceability.md#jls-coding-parameters
|
||||
// Example : PIX_WIDTH=8,NEAR=0 gives RANGE=256,qbpp=8,LIMIT=32.
|
||||
//
|
||||
// JPEG-LS coding parameter lookup. RANGE and qbpp depend on NEAR, but NEAR is
|
||||
// limited to 0..31 in this project. A lookup table avoids a synthesized
|
||||
// runtime divider and keeps this strip-level control path timing friendly.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_coding_params #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// JPEG-LS NEAR parameter for the current strip frame.
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// JPEG-LS RANGE parameter.
|
||||
output logic [16:0] RANGE,
|
||||
|
||||
// JPEG-LS quantized bits per sample, ceil(log2(RANGE)).
|
||||
output logic [4:0] qbpp,
|
||||
|
||||
// JPEG-LS LIMIT parameter used by regular-mode Golomb coding.
|
||||
output logic [6:0] LIMIT
|
||||
);
|
||||
|
||||
// Defensive clamp even though upstream NEAR is already limited to 31.
|
||||
logic [5:0] near_clamped;
|
||||
logic [16:0] range_next;
|
||||
logic [4:0] qbpp_next;
|
||||
logic [6:0] limit_next;
|
||||
|
||||
always_comb begin
|
||||
near_clamped = NEAR;
|
||||
if (NEAR > 6'd31) begin
|
||||
near_clamped = 6'd31;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
range_next = 17'd65536;
|
||||
qbpp_next = 5'd16;
|
||||
limit_next = 7'd64;
|
||||
|
||||
case (PIX_WIDTH)
|
||||
8: begin
|
||||
limit_next = 7'd32;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd256; qbpp_next = 5'd8; end
|
||||
6'd1: begin range_next = 17'd86; qbpp_next = 5'd7; end
|
||||
6'd2: begin range_next = 17'd52; qbpp_next = 5'd6; end
|
||||
6'd3: begin range_next = 17'd38; qbpp_next = 5'd6; end
|
||||
6'd4: begin range_next = 17'd30; qbpp_next = 5'd5; end
|
||||
6'd5: begin range_next = 17'd25; qbpp_next = 5'd5; end
|
||||
6'd6: begin range_next = 17'd21; qbpp_next = 5'd5; end
|
||||
6'd7: begin range_next = 17'd18; qbpp_next = 5'd5; end
|
||||
6'd8: begin range_next = 17'd16; qbpp_next = 5'd4; end
|
||||
6'd9: begin range_next = 17'd15; qbpp_next = 5'd4; end
|
||||
6'd10: begin range_next = 17'd14; qbpp_next = 5'd4; end
|
||||
6'd11: begin range_next = 17'd13; qbpp_next = 5'd4; end
|
||||
6'd12: begin range_next = 17'd12; qbpp_next = 5'd4; end
|
||||
6'd13: begin range_next = 17'd11; qbpp_next = 5'd4; end
|
||||
6'd14: begin range_next = 17'd10; qbpp_next = 5'd4; end
|
||||
6'd15: begin range_next = 17'd10; qbpp_next = 5'd4; end
|
||||
6'd16: begin range_next = 17'd9; qbpp_next = 5'd4; end
|
||||
6'd17: begin range_next = 17'd9; qbpp_next = 5'd4; end
|
||||
6'd18: begin range_next = 17'd8; qbpp_next = 5'd3; end
|
||||
6'd19: begin range_next = 17'd8; qbpp_next = 5'd3; end
|
||||
6'd20: begin range_next = 17'd8; qbpp_next = 5'd3; end
|
||||
6'd21: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd22: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd23: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd24: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd25: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd26: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd27: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd28: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd29: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd30: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
default: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
endcase
|
||||
end
|
||||
|
||||
10: begin
|
||||
limit_next = 7'd40;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd1024; qbpp_next = 5'd10; end
|
||||
6'd1: begin range_next = 17'd342; qbpp_next = 5'd9; end
|
||||
6'd2: begin range_next = 17'd206; qbpp_next = 5'd8; end
|
||||
6'd3: begin range_next = 17'd148; qbpp_next = 5'd8; end
|
||||
6'd4: begin range_next = 17'd115; qbpp_next = 5'd7; end
|
||||
6'd5: begin range_next = 17'd94; qbpp_next = 5'd7; end
|
||||
6'd6: begin range_next = 17'd80; qbpp_next = 5'd7; end
|
||||
6'd7: begin range_next = 17'd70; qbpp_next = 5'd7; end
|
||||
6'd8: begin range_next = 17'd62; qbpp_next = 5'd6; end
|
||||
6'd9: begin range_next = 17'd55; qbpp_next = 5'd6; end
|
||||
6'd10: begin range_next = 17'd50; qbpp_next = 5'd6; end
|
||||
6'd11: begin range_next = 17'd46; qbpp_next = 5'd6; end
|
||||
6'd12: begin range_next = 17'd42; qbpp_next = 5'd6; end
|
||||
6'd13: begin range_next = 17'd39; qbpp_next = 5'd6; end
|
||||
6'd14: begin range_next = 17'd37; qbpp_next = 5'd6; end
|
||||
6'd15: begin range_next = 17'd34; qbpp_next = 5'd6; end
|
||||
6'd16: begin range_next = 17'd32; qbpp_next = 5'd5; end
|
||||
6'd17: begin range_next = 17'd31; qbpp_next = 5'd5; end
|
||||
6'd18: begin range_next = 17'd29; qbpp_next = 5'd5; end
|
||||
6'd19: begin range_next = 17'd28; qbpp_next = 5'd5; end
|
||||
6'd20: begin range_next = 17'd26; qbpp_next = 5'd5; end
|
||||
6'd21: begin range_next = 17'd25; qbpp_next = 5'd5; end
|
||||
6'd22: begin range_next = 17'd24; qbpp_next = 5'd5; end
|
||||
6'd23: begin range_next = 17'd23; qbpp_next = 5'd5; end
|
||||
6'd24: begin range_next = 17'd22; qbpp_next = 5'd5; end
|
||||
6'd25: begin range_next = 17'd22; qbpp_next = 5'd5; end
|
||||
6'd26: begin range_next = 17'd21; qbpp_next = 5'd5; end
|
||||
6'd27: begin range_next = 17'd20; qbpp_next = 5'd5; end
|
||||
6'd28: begin range_next = 17'd19; qbpp_next = 5'd5; end
|
||||
6'd29: begin range_next = 17'd19; qbpp_next = 5'd5; end
|
||||
6'd30: begin range_next = 17'd18; qbpp_next = 5'd5; end
|
||||
default: begin range_next = 17'd18; qbpp_next = 5'd5; end
|
||||
endcase
|
||||
end
|
||||
|
||||
12: begin
|
||||
limit_next = 7'd48;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd4096; qbpp_next = 5'd12; end
|
||||
6'd1: begin range_next = 17'd1366; qbpp_next = 5'd11; end
|
||||
6'd2: begin range_next = 17'd820; qbpp_next = 5'd10; end
|
||||
6'd3: begin range_next = 17'd586; qbpp_next = 5'd10; end
|
||||
6'd4: begin range_next = 17'd456; qbpp_next = 5'd9; end
|
||||
6'd5: begin range_next = 17'd374; qbpp_next = 5'd9; end
|
||||
6'd6: begin range_next = 17'd316; qbpp_next = 5'd9; end
|
||||
6'd7: begin range_next = 17'd274; qbpp_next = 5'd9; end
|
||||
6'd8: begin range_next = 17'd242; qbpp_next = 5'd8; end
|
||||
6'd9: begin range_next = 17'd217; qbpp_next = 5'd8; end
|
||||
6'd10: begin range_next = 17'd196; qbpp_next = 5'd8; end
|
||||
6'd11: begin range_next = 17'd180; qbpp_next = 5'd8; end
|
||||
6'd12: begin range_next = 17'd165; qbpp_next = 5'd8; end
|
||||
6'd13: begin range_next = 17'd153; qbpp_next = 5'd8; end
|
||||
6'd14: begin range_next = 17'd143; qbpp_next = 5'd8; end
|
||||
6'd15: begin range_next = 17'd134; qbpp_next = 5'd8; end
|
||||
6'd16: begin range_next = 17'd126; qbpp_next = 5'd7; end
|
||||
6'd17: begin range_next = 17'd118; qbpp_next = 5'd7; end
|
||||
6'd18: begin range_next = 17'd112; qbpp_next = 5'd7; end
|
||||
6'd19: begin range_next = 17'd106; qbpp_next = 5'd7; end
|
||||
6'd20: begin range_next = 17'd101; qbpp_next = 5'd7; end
|
||||
6'd21: begin range_next = 17'd97; qbpp_next = 5'd7; end
|
||||
6'd22: begin range_next = 17'd92; qbpp_next = 5'd7; end
|
||||
6'd23: begin range_next = 17'd89; qbpp_next = 5'd7; end
|
||||
6'd24: begin range_next = 17'd85; qbpp_next = 5'd7; end
|
||||
6'd25: begin range_next = 17'd82; qbpp_next = 5'd7; end
|
||||
6'd26: begin range_next = 17'd79; qbpp_next = 5'd7; end
|
||||
6'd27: begin range_next = 17'd76; qbpp_next = 5'd7; end
|
||||
6'd28: begin range_next = 17'd73; qbpp_next = 5'd7; end
|
||||
6'd29: begin range_next = 17'd71; qbpp_next = 5'd7; end
|
||||
6'd30: begin range_next = 17'd69; qbpp_next = 5'd7; end
|
||||
default: begin range_next = 17'd66; qbpp_next = 5'd7; end
|
||||
endcase
|
||||
end
|
||||
|
||||
14: begin
|
||||
limit_next = 7'd56;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd16384; qbpp_next = 5'd14; end
|
||||
6'd1: begin range_next = 17'd5462; qbpp_next = 5'd13; end
|
||||
6'd2: begin range_next = 17'd3278; qbpp_next = 5'd12; end
|
||||
6'd3: begin range_next = 17'd2342; qbpp_next = 5'd12; end
|
||||
6'd4: begin range_next = 17'd1822; qbpp_next = 5'd11; end
|
||||
6'd5: begin range_next = 17'd1491; qbpp_next = 5'd11; end
|
||||
6'd6: begin range_next = 17'd1262; qbpp_next = 5'd11; end
|
||||
6'd7: begin range_next = 17'd1094; qbpp_next = 5'd11; end
|
||||
6'd8: begin range_next = 17'd965; qbpp_next = 5'd10; end
|
||||
6'd9: begin range_next = 17'd864; qbpp_next = 5'd10; end
|
||||
6'd10: begin range_next = 17'd782; qbpp_next = 5'd10; end
|
||||
6'd11: begin range_next = 17'd714; qbpp_next = 5'd10; end
|
||||
6'd12: begin range_next = 17'd657; qbpp_next = 5'd10; end
|
||||
6'd13: begin range_next = 17'd608; qbpp_next = 5'd10; end
|
||||
6'd14: begin range_next = 17'd566; qbpp_next = 5'd10; end
|
||||
6'd15: begin range_next = 17'd530; qbpp_next = 5'd10; end
|
||||
6'd16: begin range_next = 17'd498; qbpp_next = 5'd9; end
|
||||
6'd17: begin range_next = 17'd470; qbpp_next = 5'd9; end
|
||||
6'd18: begin range_next = 17'd444; qbpp_next = 5'd9; end
|
||||
6'd19: begin range_next = 17'd422; qbpp_next = 5'd9; end
|
||||
6'd20: begin range_next = 17'd401; qbpp_next = 5'd9; end
|
||||
6'd21: begin range_next = 17'd382; qbpp_next = 5'd9; end
|
||||
6'd22: begin range_next = 17'd366; qbpp_next = 5'd9; end
|
||||
6'd23: begin range_next = 17'd350; qbpp_next = 5'd9; end
|
||||
6'd24: begin range_next = 17'd336; qbpp_next = 5'd9; end
|
||||
6'd25: begin range_next = 17'd323; qbpp_next = 5'd9; end
|
||||
6'd26: begin range_next = 17'd311; qbpp_next = 5'd9; end
|
||||
6'd27: begin range_next = 17'd299; qbpp_next = 5'd9; end
|
||||
6'd28: begin range_next = 17'd289; qbpp_next = 5'd9; end
|
||||
6'd29: begin range_next = 17'd279; qbpp_next = 5'd9; end
|
||||
6'd30: begin range_next = 17'd270; qbpp_next = 5'd9; end
|
||||
default: begin range_next = 17'd262; qbpp_next = 5'd9; end
|
||||
endcase
|
||||
end
|
||||
|
||||
default: begin
|
||||
limit_next = 7'd64;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd65536; qbpp_next = 5'd16; end
|
||||
6'd1: begin range_next = 17'd21846; qbpp_next = 5'd15; end
|
||||
6'd2: begin range_next = 17'd13108; qbpp_next = 5'd14; end
|
||||
6'd3: begin range_next = 17'd9364; qbpp_next = 5'd14; end
|
||||
6'd4: begin range_next = 17'd7283; qbpp_next = 5'd13; end
|
||||
6'd5: begin range_next = 17'd5959; qbpp_next = 5'd13; end
|
||||
6'd6: begin range_next = 17'd5043; qbpp_next = 5'd13; end
|
||||
6'd7: begin range_next = 17'd4370; qbpp_next = 5'd13; end
|
||||
6'd8: begin range_next = 17'd3856; qbpp_next = 5'd12; end
|
||||
6'd9: begin range_next = 17'd3451; qbpp_next = 5'd12; end
|
||||
6'd10: begin range_next = 17'd3122; qbpp_next = 5'd12; end
|
||||
6'd11: begin range_next = 17'd2851; qbpp_next = 5'd12; end
|
||||
6'd12: begin range_next = 17'd2623; qbpp_next = 5'd12; end
|
||||
6'd13: begin range_next = 17'd2429; qbpp_next = 5'd12; end
|
||||
6'd14: begin range_next = 17'd2261; qbpp_next = 5'd12; end
|
||||
6'd15: begin range_next = 17'd2116; qbpp_next = 5'd12; end
|
||||
6'd16: begin range_next = 17'd1987; qbpp_next = 5'd11; end
|
||||
6'd17: begin range_next = 17'd1874; qbpp_next = 5'd11; end
|
||||
6'd18: begin range_next = 17'd1773; qbpp_next = 5'd11; end
|
||||
6'd19: begin range_next = 17'd1682; qbpp_next = 5'd11; end
|
||||
6'd20: begin range_next = 17'd1600; qbpp_next = 5'd11; end
|
||||
6'd21: begin range_next = 17'd1526; qbpp_next = 5'd11; end
|
||||
6'd22: begin range_next = 17'd1458; qbpp_next = 5'd11; end
|
||||
6'd23: begin range_next = 17'd1396; qbpp_next = 5'd11; end
|
||||
6'd24: begin range_next = 17'd1339; qbpp_next = 5'd11; end
|
||||
6'd25: begin range_next = 17'd1286; qbpp_next = 5'd11; end
|
||||
6'd26: begin range_next = 17'd1238; qbpp_next = 5'd11; end
|
||||
6'd27: begin range_next = 17'd1193; qbpp_next = 5'd11; end
|
||||
6'd28: begin range_next = 17'd1151; qbpp_next = 5'd11; end
|
||||
6'd29: begin range_next = 17'd1112; qbpp_next = 5'd11; end
|
||||
6'd30: begin range_next = 17'd1076; qbpp_next = 5'd11; end
|
||||
default: begin range_next = 17'd1042; qbpp_next = 5'd11; end
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
RANGE = range_next;
|
||||
qbpp = qbpp_next;
|
||||
LIMIT = limit_next;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
51
fpga/verilog/jls_common_pkg.sv
Normal file
51
fpga/verilog/jls_common_pkg.sv
Normal file
@@ -0,0 +1,51 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.1, Annex C.1-C.4, Annex D.3
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Shared engineering constants for JPEG-LS strip-frame encoder
|
||||
// Example : See docs/jls_module_interfaces.md
|
||||
//
|
||||
// Shared package for the JPEG-LS RTL encoder. Keep this file limited to
|
||||
// simple constants and small type declarations; algorithmic logic belongs in
|
||||
// pipelined modules, not in package functions.
|
||||
|
||||
package jls_common_pkg;
|
||||
|
||||
// Number of bits used by cfg_pic_col/cfg_pic_row and image coordinates.
|
||||
localparam int JLS_DIM_WIDTH = 13;
|
||||
|
||||
// Number of bits used by the runtime ratio input port.
|
||||
localparam int JLS_RATIO_WIDTH = 4;
|
||||
|
||||
// Output FIFO byte plus original-image-start sideband bit.
|
||||
localparam int JLS_OFIFO_WIDTH = 9;
|
||||
|
||||
// JPEG-LS NEAR is clamped to 0..31 in this IP.
|
||||
localparam int JLS_NEAR_WIDTH = 6;
|
||||
|
||||
// JPEG marker byte used by SOI/SOF55/LSE/SOS/EOI marker generation.
|
||||
localparam logic [7:0] JLS_MARKER_PREFIX = 8'hFF;
|
||||
|
||||
// JPEG-LS marker codes used by the header writer.
|
||||
localparam logic [7:0] JLS_MARKER_SOI = 8'hD8;
|
||||
localparam logic [7:0] JLS_MARKER_EOI = 8'hD9;
|
||||
localparam logic [7:0] JLS_MARKER_SOF55 = 8'hF7;
|
||||
localparam logic [7:0] JLS_MARKER_LSE = 8'hF8;
|
||||
localparam logic [7:0] JLS_MARKER_SOS = 8'hDA;
|
||||
|
||||
// Runtime compression-ratio encodings from the SRS ratio port.
|
||||
typedef enum logic [JLS_RATIO_WIDTH-1:0] {
|
||||
JLS_RATIO_LOSSLESS = 4'd0,
|
||||
JLS_RATIO_1_TO_2 = 4'd1,
|
||||
JLS_RATIO_1_TO_4 = 4'd2,
|
||||
JLS_RATIO_1_TO_8 = 4'd3
|
||||
} jls_ratio_e;
|
||||
|
||||
// High-level strip-frame control events.
|
||||
typedef enum logic [1:0] {
|
||||
JLS_STRIP_EVENT_NONE = 2'd0,
|
||||
JLS_STRIP_EVENT_START = 2'd1,
|
||||
JLS_STRIP_EVENT_FINISH = 2'd2
|
||||
} jls_strip_event_e;
|
||||
|
||||
endpackage
|
||||
215
fpga/verilog/jls_context_memory.sv
Normal file
215
fpga/verilog/jls_context_memory.sv
Normal file
@@ -0,0 +1,215 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex A.6 variables update
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Regular-mode context arrays A[0..364], B[0..364], C[0..364], N[0..364]
|
||||
// Trace : docs/jls_traceability.md#context-update
|
||||
// Example : RANGE=256 initializes A to max(2,(RANGE+32)/64)=4.
|
||||
//
|
||||
// Regular context storage. This module uses lazy strip initialization: strip
|
||||
// start clears a written-bit vector and latches the Annex A.2 default A value.
|
||||
// A later read of an unwritten context returns the default A/B/C/N tuple, while
|
||||
// a written context returns the RAM value. This is equivalent to writing all
|
||||
// 365 contexts at strip start, but avoids a long boundary stall.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_context_memory #(
|
||||
// Number of regular-mode contexts, indexed by abs((Q1*9+Q2)*9+Q3).
|
||||
parameter int CONTEXT_COUNT = 365
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Start lazy initialization for a new standalone strip frame.
|
||||
input var logic init_valid,
|
||||
|
||||
// Initialization command can be accepted.
|
||||
output logic init_ready,
|
||||
|
||||
// JPEG-LS RANGE parameter used to initialize A[Q].
|
||||
input var logic [16:0] init_RANGE,
|
||||
|
||||
// High while a multi-cycle initializer is active. The lazy path keeps this
|
||||
// low because the reset happens in the command-accept cycle.
|
||||
output logic init_busy,
|
||||
|
||||
// One-cycle pulse after lazy initialization has been applied.
|
||||
output logic init_done,
|
||||
|
||||
// Registered read request.
|
||||
input var logic read_valid,
|
||||
|
||||
// Read request can be accepted.
|
||||
output logic read_ready,
|
||||
|
||||
// Context index to read.
|
||||
input var logic [8:0] read_context_index,
|
||||
|
||||
// Read result is valid.
|
||||
output logic read_result_valid,
|
||||
|
||||
// Downstream stage accepted the read result.
|
||||
input var logic read_result_ready,
|
||||
|
||||
// Read context index and variables.
|
||||
output logic [8:0] read_result_context_index,
|
||||
output logic [31:0] read_A,
|
||||
output logic signed [31:0] read_B,
|
||||
output logic signed [8:0] read_C,
|
||||
output logic [15:0] read_N,
|
||||
|
||||
// Writeback request after context update arithmetic.
|
||||
input var logic write_valid,
|
||||
|
||||
// Writeback can be accepted.
|
||||
output logic write_ready,
|
||||
|
||||
// Context index and updated variables to write.
|
||||
input var logic [8:0] write_context_index,
|
||||
input var logic [31:0] write_A,
|
||||
input var logic signed [31:0] write_B,
|
||||
input var logic signed [8:0] write_C,
|
||||
input var logic [15:0] write_N
|
||||
);
|
||||
|
||||
// Context variable memories.
|
||||
logic [31:0] A_mem [0:CONTEXT_COUNT-1];
|
||||
logic signed [31:0] B_mem [0:CONTEXT_COUNT-1];
|
||||
logic signed [8:0] C_mem [0:CONTEXT_COUNT-1];
|
||||
logic [15:0] N_mem [0:CONTEXT_COUNT-1];
|
||||
|
||||
// Lazy initialization state. context_written[Q]=0 means context Q still has
|
||||
// the current strip's default A/B/C/N values.
|
||||
logic [CONTEXT_COUNT-1:0] context_written;
|
||||
logic [31:0] init_A_value;
|
||||
logic [31:0] init_A_latched;
|
||||
logic [31:0] init_A_candidate;
|
||||
logic init_command_accepted;
|
||||
|
||||
// Read/write handshake terms.
|
||||
logic read_slot_open;
|
||||
logic read_accepted;
|
||||
logic write_accepted;
|
||||
|
||||
always_comb begin
|
||||
init_A_candidate = {15'd0, init_RANGE} + 32'd32;
|
||||
init_A_candidate = init_A_candidate >> 6;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_A_value = init_A_candidate;
|
||||
if (init_A_candidate < 32'd2) begin
|
||||
init_A_value = 32'd2;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_ready = 1'b0;
|
||||
if (!read_result_valid && !write_valid) begin
|
||||
init_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_command_accepted = 1'b0;
|
||||
if (init_valid && init_ready) begin
|
||||
init_command_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_slot_open = 1'b0;
|
||||
if (!read_result_valid || read_result_ready) begin
|
||||
read_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_ready = 1'b0;
|
||||
if (!init_command_accepted && read_slot_open) begin
|
||||
read_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_accepted = 1'b0;
|
||||
if (read_valid && read_ready) begin
|
||||
read_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
write_ready = 1'b0;
|
||||
if (!init_command_accepted) begin
|
||||
write_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
write_accepted = 1'b0;
|
||||
if (write_valid && write_ready) begin
|
||||
write_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_busy = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
init_A_latched <= 32'd0;
|
||||
context_written <= {CONTEXT_COUNT{1'b0}};
|
||||
init_done <= 1'b0;
|
||||
read_result_valid <= 1'b0;
|
||||
read_result_context_index <= 9'd0;
|
||||
read_A <= 32'd0;
|
||||
read_B <= 32'sd0;
|
||||
read_C <= 9'sd0;
|
||||
read_N <= 16'd0;
|
||||
end else begin
|
||||
init_done <= 1'b0;
|
||||
|
||||
if (read_result_valid && read_result_ready && !read_accepted) begin
|
||||
read_result_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (init_command_accepted) begin
|
||||
init_A_latched <= init_A_value;
|
||||
context_written <= {CONTEXT_COUNT{1'b0}};
|
||||
init_done <= 1'b1;
|
||||
end
|
||||
|
||||
if (write_accepted) begin
|
||||
A_mem[write_context_index] <= write_A;
|
||||
B_mem[write_context_index] <= write_B;
|
||||
C_mem[write_context_index] <= write_C;
|
||||
N_mem[write_context_index] <= write_N;
|
||||
context_written[write_context_index] <= 1'b1;
|
||||
end
|
||||
|
||||
if (read_accepted) begin
|
||||
read_result_valid <= 1'b1;
|
||||
read_result_context_index <= read_context_index;
|
||||
if (context_written[read_context_index]) begin
|
||||
read_A <= A_mem[read_context_index];
|
||||
read_B <= B_mem[read_context_index];
|
||||
read_C <= C_mem[read_context_index];
|
||||
read_N <= N_mem[read_context_index];
|
||||
end else begin
|
||||
read_A <= init_A_latched;
|
||||
read_B <= 32'sd0;
|
||||
read_C <= 9'sd0;
|
||||
read_N <= 16'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1027
fpga/verilog/jls_context_model.sv
Normal file
1027
fpga/verilog/jls_context_model.sv
Normal file
File diff suppressed because it is too large
Load Diff
698
fpga/verilog/jls_context_quantizer.sv
Normal file
698
fpga/verilog/jls_context_quantizer.sv
Normal file
@@ -0,0 +1,698 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex G.1 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Quantize D1/D2/D3 into Q1/Q2/Q3 and compute context ID
|
||||
// Trace : docs/jls_traceability.md#context-update
|
||||
// Example : D1=22,D2=8,D3=2 with T1=3,T2=7,T3=21,NEAR=0 gives Q=4,3,1.
|
||||
//
|
||||
// Context quantization stage. It does not update A/B/C/N; it only computes the
|
||||
// regular-mode context selector from reconstructed neighbors and forwards the
|
||||
// predictor event to the later context memory/update pipeline.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_gradient_quantize_one #(
|
||||
// Signed gradient width. For PIX_WIDTH=16 this is 17 bits.
|
||||
parameter int DI_WIDTH = 17
|
||||
) (
|
||||
// Local gradient Di from the JPEG-LS standard.
|
||||
input var logic signed [DI_WIDTH-1:0] Di,
|
||||
|
||||
// JPEG-LS threshold T1 for the current NEAR.
|
||||
input var logic [15:0] T1,
|
||||
|
||||
// JPEG-LS threshold T2 for the current NEAR.
|
||||
input var logic [15:0] T2,
|
||||
|
||||
// JPEG-LS threshold T3 for the current NEAR.
|
||||
input var logic [15:0] T3,
|
||||
|
||||
// JPEG-LS NEAR parameter for the current strip frame.
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Quantized gradient Qi in the range -4..4.
|
||||
output logic signed [3:0] Qi
|
||||
);
|
||||
|
||||
// Padding for NEAR into the signed gradient compare width.
|
||||
localparam int NEAR_PAD_WIDTH = DI_WIDTH - 6;
|
||||
|
||||
// Signed compare constants. T1/T2/T3 are already valid for the configured
|
||||
// PIX_WIDTH, so truncation to DI_WIDTH is safe for the supported precisions.
|
||||
logic signed [DI_WIDTH-1:0] t1_pos;
|
||||
logic signed [DI_WIDTH-1:0] t2_pos;
|
||||
logic signed [DI_WIDTH-1:0] t3_pos;
|
||||
logic signed [DI_WIDTH-1:0] near_pos;
|
||||
logic signed [DI_WIDTH-1:0] t1_neg;
|
||||
logic signed [DI_WIDTH-1:0] t2_neg;
|
||||
logic signed [DI_WIDTH-1:0] t3_neg;
|
||||
logic signed [DI_WIDTH-1:0] near_neg;
|
||||
|
||||
always_comb begin
|
||||
t1_pos = $signed({1'b0, T1[DI_WIDTH-2:0]});
|
||||
t2_pos = $signed({1'b0, T2[DI_WIDTH-2:0]});
|
||||
t3_pos = $signed({1'b0, T3[DI_WIDTH-2:0]});
|
||||
near_pos = $signed({{NEAR_PAD_WIDTH{1'b0}}, NEAR});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
t1_neg = -t1_pos;
|
||||
t2_neg = -t2_pos;
|
||||
t3_neg = -t3_pos;
|
||||
near_neg = -near_pos;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Qi = 4'sd0;
|
||||
case (1'b1)
|
||||
(Di <= t3_neg): begin
|
||||
Qi = -4'sd4;
|
||||
end
|
||||
|
||||
(Di <= t2_neg): begin
|
||||
Qi = -4'sd3;
|
||||
end
|
||||
|
||||
(Di <= t1_neg): begin
|
||||
Qi = -4'sd2;
|
||||
end
|
||||
|
||||
(Di < near_neg): begin
|
||||
Qi = -4'sd1;
|
||||
end
|
||||
|
||||
(Di <= near_pos): begin
|
||||
Qi = 4'sd0;
|
||||
end
|
||||
|
||||
(Di < t1_pos): begin
|
||||
Qi = 4'sd1;
|
||||
end
|
||||
|
||||
(Di < t2_pos): begin
|
||||
Qi = 4'sd2;
|
||||
end
|
||||
|
||||
(Di < t3_pos): begin
|
||||
Qi = 4'sd3;
|
||||
end
|
||||
|
||||
default: begin
|
||||
Qi = 4'sd4;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module jls_context_quantizer #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Predicted event from jls_predictor is valid.
|
||||
input var logic predict_valid,
|
||||
|
||||
// This context stage can accept the current predicted event.
|
||||
output logic predict_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] predict_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] predict_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] predict_y,
|
||||
|
||||
// Forwarded first-pixel flag for strip-local state reset.
|
||||
input var logic predict_strip_first_pixel,
|
||||
|
||||
// Forwarded last-pixel flag for strip-local flush handling.
|
||||
input var logic predict_strip_last_pixel,
|
||||
|
||||
// JPEG-LS reconstructed neighbors.
|
||||
input var logic [PIX_WIDTH-1:0] Ra,
|
||||
input var logic [PIX_WIDTH-1:0] Rb,
|
||||
input var logic [PIX_WIDTH-1:0] Rc,
|
||||
input var logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// JPEG-LS MED prediction value Px.
|
||||
input var logic [PIX_WIDTH-1:0] Px,
|
||||
|
||||
// JPEG-LS threshold and NEAR values for the current strip frame.
|
||||
input var logic [15:0] T1,
|
||||
input var logic [15:0] T2,
|
||||
input var logic [15:0] T3,
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Quantized context event is valid.
|
||||
output logic context_valid,
|
||||
|
||||
// Downstream context memory/error stage accepted this event.
|
||||
input var logic context_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
output logic [PIX_WIDTH-1:0] context_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] context_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] context_y,
|
||||
|
||||
// Forwarded strip boundary flags.
|
||||
output logic context_strip_first_pixel,
|
||||
output logic context_strip_last_pixel,
|
||||
|
||||
// Forwarded predictor and neighbors.
|
||||
output logic [PIX_WIDTH-1:0] context_Px,
|
||||
output logic [PIX_WIDTH-1:0] context_Ra,
|
||||
output logic [PIX_WIDTH-1:0] context_Rb,
|
||||
output logic [PIX_WIDTH-1:0] context_Rc,
|
||||
output logic [PIX_WIDTH-1:0] context_Rd,
|
||||
|
||||
// Standard quantized gradients Q1/Q2/Q3.
|
||||
output logic signed [3:0] Q1,
|
||||
output logic signed [3:0] Q2,
|
||||
output logic signed [3:0] Q3,
|
||||
|
||||
// Absolute regular-mode context index, 0..364.
|
||||
output logic [8:0] context_index,
|
||||
|
||||
// Context sign is high when the unnormalized context value is negative.
|
||||
output logic context_negative,
|
||||
|
||||
// High when Q1=Q2=Q3=0, which selects JPEG-LS run mode.
|
||||
output logic run_mode_context
|
||||
);
|
||||
|
||||
// Signed gradient width, enough for -(2^PIX_WIDTH-1)..(2^PIX_WIDTH-1).
|
||||
localparam int DI_WIDTH = PIX_WIDTH + 1;
|
||||
|
||||
// Standard local gradients.
|
||||
logic signed [DI_WIDTH-1:0] D1;
|
||||
logic signed [DI_WIDTH-1:0] D2;
|
||||
logic signed [DI_WIDTH-1:0] D3;
|
||||
|
||||
// Registered input stage. Annex A.3 gradient quantization uses strip
|
||||
// constants T1/T2/T3/NEAR and four reconstructed neighbors; latching them
|
||||
// with the pixel event keeps active_strip_near from feeding the full compare
|
||||
// tree in the same 250 MHz cycle.
|
||||
logic stage_valid;
|
||||
logic [PIX_WIDTH-1:0] stage_sample;
|
||||
logic [12:0] stage_x;
|
||||
logic [12:0] stage_y;
|
||||
logic stage_strip_first_pixel;
|
||||
logic stage_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] stage_Px;
|
||||
logic [PIX_WIDTH-1:0] stage_Ra;
|
||||
logic [PIX_WIDTH-1:0] stage_Rb;
|
||||
logic [PIX_WIDTH-1:0] stage_Rc;
|
||||
logic [PIX_WIDTH-1:0] stage_Rd;
|
||||
logic [15:0] stage_T1;
|
||||
logic [15:0] stage_T2;
|
||||
logic [15:0] stage_T3;
|
||||
logic [5:0] stage_NEAR;
|
||||
|
||||
// One-entry input skid slot. predict_ready depends only on this local slot,
|
||||
// not on context_ready from the later context-memory hazard path. If the
|
||||
// quantizer pipeline stalls, one additional predictor event can be absorbed
|
||||
// locally; after the stall releases, the skid entry is promoted to stage_*.
|
||||
logic stage_next_valid;
|
||||
logic [PIX_WIDTH-1:0] stage_next_sample;
|
||||
logic [12:0] stage_next_x;
|
||||
logic [12:0] stage_next_y;
|
||||
logic stage_next_strip_first_pixel;
|
||||
logic stage_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Px;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Rd;
|
||||
logic [15:0] stage_next_T1;
|
||||
logic [15:0] stage_next_T2;
|
||||
logic [15:0] stage_next_T3;
|
||||
logic [5:0] stage_next_NEAR;
|
||||
|
||||
// Registered quantized-gradient payload. Splitting Q1/Q2/Q3 from the
|
||||
// threshold compare stage keeps Annex A.3 gradient quantization out of the
|
||||
// context-index adder chain at the 250 MHz target.
|
||||
logic q_stage_valid;
|
||||
logic [PIX_WIDTH-1:0] q_stage_sample;
|
||||
logic [12:0] q_stage_x;
|
||||
logic [12:0] q_stage_y;
|
||||
logic q_stage_strip_first_pixel;
|
||||
logic q_stage_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Px;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Ra;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Rb;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Rc;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Rd;
|
||||
logic signed [3:0] q_stage_Q1;
|
||||
logic signed [3:0] q_stage_Q2;
|
||||
logic signed [3:0] q_stage_Q3;
|
||||
|
||||
// One-entry output skid slot. q_stage progression depends only on this
|
||||
// local slot, not on the downstream context-memory hazard ready path. This
|
||||
// keeps run/regular arbitration and context-memory backpressure out of the
|
||||
// quantizer stage register CE cone.
|
||||
logic context_next_valid;
|
||||
logic [PIX_WIDTH-1:0] context_next_sample;
|
||||
logic [12:0] context_next_x;
|
||||
logic [12:0] context_next_y;
|
||||
logic context_next_strip_first_pixel;
|
||||
logic context_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] context_next_Px;
|
||||
logic [PIX_WIDTH-1:0] context_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] context_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] context_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] context_next_Rd;
|
||||
logic signed [3:0] context_next_Q1;
|
||||
logic signed [3:0] context_next_Q2;
|
||||
logic signed [3:0] context_next_Q3;
|
||||
logic [8:0] context_next_index;
|
||||
logic context_next_negative;
|
||||
logic context_next_run_mode;
|
||||
|
||||
// Combinational quantized gradients.
|
||||
logic signed [3:0] q1_next;
|
||||
logic signed [3:0] q2_next;
|
||||
logic signed [3:0] q3_next;
|
||||
|
||||
// Signed context computation: (Q1 * 9 + Q2) * 9 + Q3.
|
||||
logic signed [9:0] q1_ext;
|
||||
logic signed [9:0] q2_ext;
|
||||
logic signed [9:0] q3_ext;
|
||||
logic signed [9:0] q1_times_81;
|
||||
logic signed [9:0] q2_times_9;
|
||||
logic signed [9:0] context_value_next;
|
||||
logic signed [9:0] context_abs_next;
|
||||
logic context_negative_next;
|
||||
logic run_mode_context_next;
|
||||
|
||||
// Handshake terms.
|
||||
logic context_accept;
|
||||
logic context_direct_from_q;
|
||||
logic context_store_next;
|
||||
logic context_promote_next;
|
||||
logic q_stage_to_output;
|
||||
logic q_stage_open;
|
||||
logic stage_to_q;
|
||||
logic accept_predict;
|
||||
logic stage_load_predict;
|
||||
logic stage_store_next;
|
||||
logic stage_promote_next;
|
||||
|
||||
always_comb begin
|
||||
D1 = $signed({1'b0, stage_Rd}) - $signed({1'b0, stage_Rb});
|
||||
D2 = $signed({1'b0, stage_Rb}) - $signed({1'b0, stage_Rc});
|
||||
D3 = $signed({1'b0, stage_Rc}) - $signed({1'b0, stage_Ra});
|
||||
end
|
||||
|
||||
jls_gradient_quantize_one #(
|
||||
.DI_WIDTH(DI_WIDTH)
|
||||
) q1_quantizer (
|
||||
.Di(D1),
|
||||
.T1(stage_T1),
|
||||
.T2(stage_T2),
|
||||
.T3(stage_T3),
|
||||
.NEAR(stage_NEAR),
|
||||
.Qi(q1_next)
|
||||
);
|
||||
|
||||
jls_gradient_quantize_one #(
|
||||
.DI_WIDTH(DI_WIDTH)
|
||||
) q2_quantizer (
|
||||
.Di(D2),
|
||||
.T1(stage_T1),
|
||||
.T2(stage_T2),
|
||||
.T3(stage_T3),
|
||||
.NEAR(stage_NEAR),
|
||||
.Qi(q2_next)
|
||||
);
|
||||
|
||||
jls_gradient_quantize_one #(
|
||||
.DI_WIDTH(DI_WIDTH)
|
||||
) q3_quantizer (
|
||||
.Di(D3),
|
||||
.T1(stage_T1),
|
||||
.T2(stage_T2),
|
||||
.T3(stage_T3),
|
||||
.NEAR(stage_NEAR),
|
||||
.Qi(q3_next)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
q1_ext = {{6{q_stage_Q1[3]}}, q_stage_Q1};
|
||||
q2_ext = {{6{q_stage_Q2[3]}}, q_stage_Q2};
|
||||
q3_ext = {{6{q_stage_Q3[3]}}, q_stage_Q3};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
q1_times_81 = (q1_ext <<< 6) + (q1_ext <<< 4) + q1_ext;
|
||||
q2_times_9 = (q2_ext <<< 3) + q2_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_value_next = q1_times_81 + q2_times_9 + q3_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_negative_next = 1'b0;
|
||||
if (context_value_next < 10'sd0) begin
|
||||
context_negative_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_abs_next = context_value_next;
|
||||
if (context_negative_next) begin
|
||||
context_abs_next = -context_value_next;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_mode_context_next = 1'b0;
|
||||
if (q_stage_Q1 == 4'sd0 && q_stage_Q2 == 4'sd0 && q_stage_Q3 == 4'sd0) begin
|
||||
run_mode_context_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_accept = 1'b0;
|
||||
if (context_valid && context_ready) begin
|
||||
context_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_direct_from_q = 1'b0;
|
||||
if (q_stage_to_output && (!context_valid || context_accept)) begin
|
||||
context_direct_from_q = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_store_next = 1'b0;
|
||||
if (q_stage_to_output && context_valid && !context_accept) begin
|
||||
context_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_promote_next = 1'b0;
|
||||
if (context_next_valid && (!context_valid || context_accept)) begin
|
||||
context_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
q_stage_to_output = 1'b0;
|
||||
if (q_stage_valid && !context_next_valid) begin
|
||||
q_stage_to_output = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
q_stage_open = 1'b0;
|
||||
if (!q_stage_valid || q_stage_to_output) begin
|
||||
q_stage_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_to_q = 1'b0;
|
||||
if (stage_valid && q_stage_open) begin
|
||||
stage_to_q = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
predict_ready = 1'b0;
|
||||
if (!stage_next_valid) begin
|
||||
predict_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_predict = 1'b0;
|
||||
if (predict_valid && predict_ready) begin
|
||||
accept_predict = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_load_predict = 1'b0;
|
||||
if (accept_predict && (!stage_valid || stage_to_q)) begin
|
||||
stage_load_predict = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_store_next = 1'b0;
|
||||
if (accept_predict && stage_valid && !stage_to_q) begin
|
||||
stage_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_promote_next = 1'b0;
|
||||
if (stage_to_q && stage_next_valid) begin
|
||||
stage_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
stage_valid <= 1'b0;
|
||||
stage_sample <= {PIX_WIDTH{1'b0}};
|
||||
stage_x <= 13'd0;
|
||||
stage_y <= 13'd0;
|
||||
stage_strip_first_pixel <= 1'b0;
|
||||
stage_strip_last_pixel <= 1'b0;
|
||||
stage_Px <= {PIX_WIDTH{1'b0}};
|
||||
stage_Ra <= {PIX_WIDTH{1'b0}};
|
||||
stage_Rb <= {PIX_WIDTH{1'b0}};
|
||||
stage_Rc <= {PIX_WIDTH{1'b0}};
|
||||
stage_Rd <= {PIX_WIDTH{1'b0}};
|
||||
stage_T1 <= 16'd0;
|
||||
stage_T2 <= 16'd0;
|
||||
stage_T3 <= 16'd0;
|
||||
stage_NEAR <= 6'd0;
|
||||
stage_next_valid <= 1'b0;
|
||||
stage_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_x <= 13'd0;
|
||||
stage_next_y <= 13'd0;
|
||||
stage_next_strip_first_pixel <= 1'b0;
|
||||
stage_next_strip_last_pixel <= 1'b0;
|
||||
stage_next_Px <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_T1 <= 16'd0;
|
||||
stage_next_T2 <= 16'd0;
|
||||
stage_next_T3 <= 16'd0;
|
||||
stage_next_NEAR <= 6'd0;
|
||||
q_stage_valid <= 1'b0;
|
||||
q_stage_sample <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_x <= 13'd0;
|
||||
q_stage_y <= 13'd0;
|
||||
q_stage_strip_first_pixel <= 1'b0;
|
||||
q_stage_strip_last_pixel <= 1'b0;
|
||||
q_stage_Px <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Ra <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Rb <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Rc <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Rd <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Q1 <= 4'sd0;
|
||||
q_stage_Q2 <= 4'sd0;
|
||||
q_stage_Q3 <= 4'sd0;
|
||||
context_next_valid <= 1'b0;
|
||||
context_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
context_next_x <= 13'd0;
|
||||
context_next_y <= 13'd0;
|
||||
context_next_strip_first_pixel <= 1'b0;
|
||||
context_next_strip_last_pixel <= 1'b0;
|
||||
context_next_Px <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Q1 <= 4'sd0;
|
||||
context_next_Q2 <= 4'sd0;
|
||||
context_next_Q3 <= 4'sd0;
|
||||
context_next_index <= 9'd0;
|
||||
context_next_negative <= 1'b0;
|
||||
context_next_run_mode <= 1'b0;
|
||||
context_valid <= 1'b0;
|
||||
context_sample <= {PIX_WIDTH{1'b0}};
|
||||
context_x <= 13'd0;
|
||||
context_y <= 13'd0;
|
||||
context_strip_first_pixel <= 1'b0;
|
||||
context_strip_last_pixel <= 1'b0;
|
||||
context_Px <= {PIX_WIDTH{1'b0}};
|
||||
context_Ra <= {PIX_WIDTH{1'b0}};
|
||||
context_Rb <= {PIX_WIDTH{1'b0}};
|
||||
context_Rc <= {PIX_WIDTH{1'b0}};
|
||||
context_Rd <= {PIX_WIDTH{1'b0}};
|
||||
Q1 <= 4'sd0;
|
||||
Q2 <= 4'sd0;
|
||||
Q3 <= 4'sd0;
|
||||
context_index <= 9'd0;
|
||||
context_negative <= 1'b0;
|
||||
run_mode_context <= 1'b0;
|
||||
end else begin
|
||||
if (context_accept && !context_promote_next && !context_direct_from_q) begin
|
||||
context_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (context_promote_next) begin
|
||||
context_valid <= 1'b1;
|
||||
context_sample <= context_next_sample;
|
||||
context_x <= context_next_x;
|
||||
context_y <= context_next_y;
|
||||
context_strip_first_pixel <= context_next_strip_first_pixel;
|
||||
context_strip_last_pixel <= context_next_strip_last_pixel;
|
||||
context_Px <= context_next_Px;
|
||||
context_Ra <= context_next_Ra;
|
||||
context_Rb <= context_next_Rb;
|
||||
context_Rc <= context_next_Rc;
|
||||
context_Rd <= context_next_Rd;
|
||||
Q1 <= context_next_Q1;
|
||||
Q2 <= context_next_Q2;
|
||||
Q3 <= context_next_Q3;
|
||||
context_index <= context_next_index;
|
||||
context_negative <= context_next_negative;
|
||||
run_mode_context <= context_next_run_mode;
|
||||
context_next_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (context_direct_from_q) begin
|
||||
context_valid <= 1'b1;
|
||||
context_sample <= q_stage_sample;
|
||||
context_x <= q_stage_x;
|
||||
context_y <= q_stage_y;
|
||||
context_strip_first_pixel <= q_stage_strip_first_pixel;
|
||||
context_strip_last_pixel <= q_stage_strip_last_pixel;
|
||||
context_Px <= q_stage_Px;
|
||||
context_Ra <= q_stage_Ra;
|
||||
context_Rb <= q_stage_Rb;
|
||||
context_Rc <= q_stage_Rc;
|
||||
context_Rd <= q_stage_Rd;
|
||||
Q1 <= q_stage_Q1;
|
||||
Q2 <= q_stage_Q2;
|
||||
Q3 <= q_stage_Q3;
|
||||
context_index <= context_abs_next[8:0];
|
||||
context_negative <= context_negative_next;
|
||||
run_mode_context <= run_mode_context_next;
|
||||
end
|
||||
|
||||
if (context_store_next) begin
|
||||
context_next_valid <= 1'b1;
|
||||
context_next_sample <= q_stage_sample;
|
||||
context_next_x <= q_stage_x;
|
||||
context_next_y <= q_stage_y;
|
||||
context_next_strip_first_pixel <= q_stage_strip_first_pixel;
|
||||
context_next_strip_last_pixel <= q_stage_strip_last_pixel;
|
||||
context_next_Px <= q_stage_Px;
|
||||
context_next_Ra <= q_stage_Ra;
|
||||
context_next_Rb <= q_stage_Rb;
|
||||
context_next_Rc <= q_stage_Rc;
|
||||
context_next_Rd <= q_stage_Rd;
|
||||
context_next_Q1 <= q_stage_Q1;
|
||||
context_next_Q2 <= q_stage_Q2;
|
||||
context_next_Q3 <= q_stage_Q3;
|
||||
context_next_index <= context_abs_next[8:0];
|
||||
context_next_negative <= context_negative_next;
|
||||
context_next_run_mode <= run_mode_context_next;
|
||||
end
|
||||
|
||||
if (stage_to_q) begin
|
||||
q_stage_valid <= 1'b1;
|
||||
q_stage_sample <= stage_sample;
|
||||
q_stage_x <= stage_x;
|
||||
q_stage_y <= stage_y;
|
||||
q_stage_strip_first_pixel <= stage_strip_first_pixel;
|
||||
q_stage_strip_last_pixel <= stage_strip_last_pixel;
|
||||
q_stage_Px <= stage_Px;
|
||||
q_stage_Ra <= stage_Ra;
|
||||
q_stage_Rb <= stage_Rb;
|
||||
q_stage_Rc <= stage_Rc;
|
||||
q_stage_Rd <= stage_Rd;
|
||||
q_stage_Q1 <= q1_next;
|
||||
q_stage_Q2 <= q2_next;
|
||||
q_stage_Q3 <= q3_next;
|
||||
end else if (q_stage_to_output) begin
|
||||
q_stage_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (stage_promote_next) begin
|
||||
stage_valid <= 1'b1;
|
||||
stage_sample <= stage_next_sample;
|
||||
stage_x <= stage_next_x;
|
||||
stage_y <= stage_next_y;
|
||||
stage_strip_first_pixel <= stage_next_strip_first_pixel;
|
||||
stage_strip_last_pixel <= stage_next_strip_last_pixel;
|
||||
stage_Px <= stage_next_Px;
|
||||
stage_Ra <= stage_next_Ra;
|
||||
stage_Rb <= stage_next_Rb;
|
||||
stage_Rc <= stage_next_Rc;
|
||||
stage_Rd <= stage_next_Rd;
|
||||
stage_T1 <= stage_next_T1;
|
||||
stage_T2 <= stage_next_T2;
|
||||
stage_T3 <= stage_next_T3;
|
||||
stage_NEAR <= stage_next_NEAR;
|
||||
stage_next_valid <= 1'b0;
|
||||
end else if (stage_to_q) begin
|
||||
stage_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (stage_load_predict) begin
|
||||
stage_valid <= 1'b1;
|
||||
stage_sample <= predict_sample;
|
||||
stage_x <= predict_x;
|
||||
stage_y <= predict_y;
|
||||
stage_strip_first_pixel <= predict_strip_first_pixel;
|
||||
stage_strip_last_pixel <= predict_strip_last_pixel;
|
||||
stage_Px <= Px;
|
||||
stage_Ra <= Ra;
|
||||
stage_Rb <= Rb;
|
||||
stage_Rc <= Rc;
|
||||
stage_Rd <= Rd;
|
||||
stage_T1 <= T1;
|
||||
stage_T2 <= T2;
|
||||
stage_T3 <= T3;
|
||||
stage_NEAR <= NEAR;
|
||||
end
|
||||
|
||||
if (stage_store_next) begin
|
||||
stage_next_valid <= 1'b1;
|
||||
stage_next_sample <= predict_sample;
|
||||
stage_next_x <= predict_x;
|
||||
stage_next_y <= predict_y;
|
||||
stage_next_strip_first_pixel <= predict_strip_first_pixel;
|
||||
stage_next_strip_last_pixel <= predict_strip_last_pixel;
|
||||
stage_next_Px <= Px;
|
||||
stage_next_Ra <= Ra;
|
||||
stage_next_Rb <= Rb;
|
||||
stage_next_Rc <= Rc;
|
||||
stage_next_Rd <= Rd;
|
||||
stage_next_T1 <= T1;
|
||||
stage_next_T2 <= T2;
|
||||
stage_next_T3 <= T3;
|
||||
stage_next_NEAR <= NEAR;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
960
fpga/verilog/jls_context_update.sv
Normal file
960
fpga/verilog/jls_context_update.sv
Normal file
@@ -0,0 +1,960 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Golomb parameter, Annex A.6 variables update
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Compute k and update regular-mode A/B/C/N
|
||||
// Trace : docs/jls_traceability.md#context-update
|
||||
// Example : A=4,N=1,Errval=3 gives k=2 before A is updated to 7.
|
||||
//
|
||||
// Pipelined single-context update arithmetic. Stage 0 captures the Annex A.6
|
||||
// input context. Stage 1 registers the odd-scale multiplier operands for
|
||||
// Errval*(2*NEAR+1) and carries the Annex A.5 Golomb k decision. Stage 2
|
||||
// captures the scaled product. Stage 3 accumulates B[Q] and performs
|
||||
// RESET/bias correction. The split keeps NEAR-driven arithmetic short at the
|
||||
// 250 MHz target.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_context_update (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input update event is valid.
|
||||
input var logic update_valid,
|
||||
|
||||
// This stage can accept the update event.
|
||||
output logic update_ready,
|
||||
|
||||
// Standard context variables before update.
|
||||
input var logic [31:0] A_in,
|
||||
input var logic signed [31:0] B_in,
|
||||
input var logic signed [8:0] C_in,
|
||||
input var logic [15:0] N_in,
|
||||
|
||||
// Quantized prediction error Errval for this context.
|
||||
input var logic signed [31:0] Errval,
|
||||
|
||||
// Context and strip metadata forwarded with Errval.
|
||||
input var logic [8:0] context_index_in,
|
||||
input var logic strip_last_pixel_in,
|
||||
|
||||
// Coding parameters forwarded for the Golomb encoder.
|
||||
input var logic [4:0] qbpp_in,
|
||||
input var logic [6:0] LIMIT_in,
|
||||
|
||||
// JPEG-LS NEAR parameter for this strip.
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// JPEG-LS RESET parameter, normally 64.
|
||||
input var logic [15:0] RESET,
|
||||
|
||||
// Output updated context event is valid.
|
||||
output logic result_valid,
|
||||
|
||||
// Downstream context table accepted this result.
|
||||
input var logic result_ready,
|
||||
|
||||
// Golomb parameter computed from A_in and N_in before the update.
|
||||
output logic [4:0] k,
|
||||
|
||||
// Forwarded Errval for the downstream error mapper.
|
||||
output logic signed [31:0] Errval_out,
|
||||
|
||||
// Forwarded context and strip metadata.
|
||||
output logic [8:0] context_index_out,
|
||||
output logic strip_last_pixel_out,
|
||||
|
||||
// Forwarded coding parameters.
|
||||
output logic [4:0] qbpp_out,
|
||||
output logic [6:0] LIMIT_out,
|
||||
|
||||
// High when get_error_correction(k | NEAR) requests mapping inversion.
|
||||
output logic map_invert,
|
||||
|
||||
// Standard context variables after update.
|
||||
output logic [31:0] A_out,
|
||||
output logic signed [31:0] B_out,
|
||||
output logic signed [8:0] C_out,
|
||||
output logic [15:0] N_out
|
||||
);
|
||||
|
||||
// Signed and absolute forms of Errval.
|
||||
logic signed [32:0] Errval_ext;
|
||||
logic [32:0] abs_Errval_ext;
|
||||
|
||||
// Stage-1 update terms from Annex A.6.
|
||||
logic signed [7:0] near_scale;
|
||||
logic signed [40:0] B_delta;
|
||||
logic [31:0] A_accum_next;
|
||||
logic signed [40:0] B_accum_next;
|
||||
logic [15:0] N_halved_plus_one_next;
|
||||
|
||||
// Golomb parameter compare terms from A_in and N_in.
|
||||
logic [31:0] N_shift_0;
|
||||
logic [31:0] N_shift_1;
|
||||
logic [31:0] N_shift_2;
|
||||
logic [31:0] N_shift_3;
|
||||
logic [31:0] N_shift_4;
|
||||
logic [31:0] N_shift_5;
|
||||
logic [31:0] N_shift_6;
|
||||
logic [31:0] N_shift_7;
|
||||
logic [31:0] N_shift_8;
|
||||
logic [31:0] N_shift_9;
|
||||
logic [31:0] N_shift_10;
|
||||
logic [31:0] N_shift_11;
|
||||
logic [31:0] N_shift_12;
|
||||
logic [31:0] N_shift_13;
|
||||
logic [31:0] N_shift_14;
|
||||
logic [31:0] N_shift_15;
|
||||
logic [31:0] N_shift_16;
|
||||
logic [4:0] k_next;
|
||||
logic k_or_near_is_zero;
|
||||
logic signed [32:0] map_bias_check;
|
||||
logic map_invert_next;
|
||||
|
||||
// Stage-0 registered input payload. These are raw standard variables from
|
||||
// the context table and regular-mode Errval path.
|
||||
logic s0_valid;
|
||||
logic [31:0] s0_A_in;
|
||||
logic signed [31:0] s0_B_in;
|
||||
logic signed [8:0] s0_C_in;
|
||||
logic [15:0] s0_N_in;
|
||||
logic signed [31:0] s0_Errval;
|
||||
logic [8:0] s0_context_index;
|
||||
logic s0_strip_last_pixel;
|
||||
logic [4:0] s0_qbpp;
|
||||
logic [6:0] s0_LIMIT;
|
||||
logic [5:0] s0_NEAR;
|
||||
logic [15:0] s0_RESET;
|
||||
|
||||
// One-entry input skid slot. It keeps update_ready dependent only on local
|
||||
// queue fullness instead of the result_ready/context-write/error-map chain.
|
||||
// Example: if stage 0 is blocked for one cycle, the next regular Errval can
|
||||
// be captured here without propagating downstream backpressure to the
|
||||
// regular error quantizer CE path.
|
||||
logic update_next_valid;
|
||||
logic [31:0] update_next_A_in;
|
||||
logic signed [31:0] update_next_B_in;
|
||||
logic signed [8:0] update_next_C_in;
|
||||
logic [15:0] update_next_N_in;
|
||||
logic signed [31:0] update_next_Errval;
|
||||
logic [8:0] update_next_context_index;
|
||||
logic update_next_strip_last_pixel;
|
||||
logic [4:0] update_next_qbpp;
|
||||
logic [6:0] update_next_LIMIT;
|
||||
logic [5:0] update_next_NEAR;
|
||||
logic [15:0] update_next_RESET;
|
||||
|
||||
// Stage-1 registered multiplier operands/update payload. s1_Errval_ext and
|
||||
// s1_near_scale are the registered odd-scale multiply operands for the
|
||||
// Annex A.6 Errval*(2*NEAR+1) term.
|
||||
logic s1_valid;
|
||||
logic [31:0] s1_A_accum;
|
||||
logic signed [31:0] s1_B_in;
|
||||
logic signed [8:0] s1_C_in;
|
||||
logic [15:0] s1_N_in;
|
||||
logic [15:0] s1_N_halved_plus_one;
|
||||
logic [15:0] s1_RESET;
|
||||
logic [4:0] s1_k;
|
||||
logic signed [31:0] s1_Errval;
|
||||
logic [8:0] s1_context_index;
|
||||
logic s1_strip_last_pixel;
|
||||
logic [4:0] s1_qbpp;
|
||||
logic [6:0] s1_LIMIT;
|
||||
logic s1_map_invert;
|
||||
logic signed [32:0] s1_Errval_ext;
|
||||
logic signed [7:0] s1_near_scale;
|
||||
|
||||
// Stage-2 registered product/update payload. s2_B_delta is the registered
|
||||
// scaled Errval term for the Annex A.6 B[Q] update before the following
|
||||
// carry-chain add.
|
||||
logic s2_valid;
|
||||
logic [31:0] s2_A_accum;
|
||||
logic signed [31:0] s2_B_in;
|
||||
logic signed [40:0] s2_B_delta;
|
||||
logic signed [8:0] s2_C_in;
|
||||
logic [15:0] s2_N_in;
|
||||
logic [15:0] s2_N_halved_plus_one;
|
||||
logic [15:0] s2_RESET;
|
||||
logic [4:0] s2_k;
|
||||
logic signed [31:0] s2_Errval;
|
||||
logic [8:0] s2_context_index;
|
||||
logic s2_strip_last_pixel;
|
||||
logic [4:0] s2_qbpp;
|
||||
logic [6:0] s2_LIMIT;
|
||||
logic s2_map_invert;
|
||||
|
||||
// Registered stage-3 payload. These names track the standard A/B/C/N and
|
||||
// Errval variables so the implementation can be compared with Annex A.6.
|
||||
logic stage_valid;
|
||||
logic [31:0] stage_A_accum;
|
||||
logic signed [40:0] stage_B_accum;
|
||||
logic signed [8:0] stage_C_in;
|
||||
logic [15:0] stage_N_in;
|
||||
logic [15:0] stage_N_halved_plus_one;
|
||||
logic [15:0] stage_RESET;
|
||||
logic [4:0] stage_k;
|
||||
logic signed [31:0] stage_Errval;
|
||||
logic [8:0] stage_context_index;
|
||||
logic stage_strip_last_pixel;
|
||||
logic [4:0] stage_qbpp;
|
||||
logic [6:0] stage_LIMIT;
|
||||
logic stage_map_invert;
|
||||
|
||||
// Registered pre-bias stage. This stage holds the Annex A.6 variables after
|
||||
// RESET folding but before B[Q]/C[Q] bias correction.
|
||||
logic prebias_valid;
|
||||
logic [31:0] prebias_A_after_reset;
|
||||
logic signed [40:0] prebias_B_after_reset;
|
||||
logic signed [8:0] prebias_C_in;
|
||||
logic [15:0] prebias_N_after_increment;
|
||||
logic [4:0] prebias_k;
|
||||
logic signed [31:0] prebias_Errval;
|
||||
logic [8:0] prebias_context_index;
|
||||
logic prebias_strip_last_pixel;
|
||||
logic [4:0] prebias_qbpp;
|
||||
logic [6:0] prebias_LIMIT;
|
||||
logic prebias_map_invert;
|
||||
|
||||
// Registered bias-correction stage. The corrected B[Q]/C[Q] values are
|
||||
// stored here so the public result path does not have to re-run the bias
|
||||
// compare/add/saturate logic in the same cycle.
|
||||
logic bias_valid;
|
||||
logic [31:0] bias_A_after_reset;
|
||||
logic signed [31:0] bias_B_after_bias;
|
||||
logic signed [8:0] bias_C_after_bias;
|
||||
logic [15:0] bias_N_after_increment;
|
||||
logic [4:0] bias_k;
|
||||
logic signed [31:0] bias_Errval;
|
||||
logic [8:0] bias_context_index;
|
||||
logic bias_strip_last_pixel;
|
||||
logic [4:0] bias_qbpp;
|
||||
logic [6:0] bias_LIMIT;
|
||||
logic bias_map_invert;
|
||||
|
||||
// One-entry output skid slot. Stage 3 may retire into this slot when the
|
||||
// public result register is still waiting for context writeback or error
|
||||
// mapper acceptance. This breaks the downstream ready chain from feeding
|
||||
// back through every context-update pipeline CE in one 250 MHz cycle.
|
||||
logic result_next_valid;
|
||||
logic [4:0] result_next_k;
|
||||
logic signed [31:0] result_next_Errval_out;
|
||||
logic [8:0] result_next_context_index_out;
|
||||
logic result_next_strip_last_pixel_out;
|
||||
logic [4:0] result_next_qbpp_out;
|
||||
logic [6:0] result_next_LIMIT_out;
|
||||
logic result_next_map_invert;
|
||||
logic [31:0] result_next_A_out;
|
||||
logic signed [31:0] result_next_B_out;
|
||||
logic signed [8:0] result_next_C_out;
|
||||
logic [15:0] result_next_N_out;
|
||||
|
||||
// Stage-1 RESET and bias-update intermediates.
|
||||
logic [31:0] A_after_reset;
|
||||
logic signed [40:0] B_after_reset;
|
||||
logic [15:0] N_after_increment;
|
||||
logic signed [40:0] bias_stage_N_signed_ext;
|
||||
logic signed [40:0] bias_stage_negative_N_plus_one;
|
||||
logic signed [40:0] bias_stage_B_plus_N;
|
||||
logic signed [40:0] bias_stage_B_minus_N;
|
||||
logic signed [40:0] bias_stage_B_after_bias;
|
||||
logic signed [8:0] bias_stage_C_after_bias;
|
||||
|
||||
// Handshake terms.
|
||||
logic result_slot_open;
|
||||
logic bias_to_result;
|
||||
logic bias_open;
|
||||
logic prebias_to_bias;
|
||||
logic prebias_open;
|
||||
logic stage_to_prebias;
|
||||
logic stage_open;
|
||||
logic s2_to_stage;
|
||||
logic s2_open;
|
||||
logic s1_to_s2;
|
||||
logic s1_open;
|
||||
logic s0_to_s1;
|
||||
logic s0_open;
|
||||
logic accept_update;
|
||||
logic update_load_input;
|
||||
logic update_store_next;
|
||||
logic s0_promote_next;
|
||||
logic result_accept;
|
||||
logic result_direct_from_bias;
|
||||
logic result_store_next;
|
||||
logic result_promote_next;
|
||||
|
||||
// Shared narrow-scale multiplier for Annex A.6 Errval*(2*NEAR+1).
|
||||
jls_near_scale_mul #(
|
||||
.INPUT_WIDTH(33),
|
||||
.OUTPUT_WIDTH(41)
|
||||
) context_update_near_scale_mul_i (
|
||||
.multiplicand_i(s1_Errval_ext),
|
||||
.near_scale_i(s1_near_scale[5:0]),
|
||||
.product_o(B_delta)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
Errval_ext = {s0_Errval[31], s0_Errval};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
abs_Errval_ext = Errval_ext[32:0];
|
||||
if (Errval_ext < 33'sd0) begin
|
||||
abs_Errval_ext = -Errval_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_scale = $signed({1'b0, s0_NEAR, 1'b1});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
A_accum_next = s0_A_in + abs_Errval_ext[31:0];
|
||||
B_accum_next = {{9{s2_B_in[31]}}, s2_B_in} + s2_B_delta;
|
||||
N_halved_plus_one_next = (s0_N_in >> 1) + 16'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
N_shift_0 = {16'd0, s0_N_in};
|
||||
N_shift_1 = {15'd0, s0_N_in, 1'd0};
|
||||
N_shift_2 = {14'd0, s0_N_in, 2'd0};
|
||||
N_shift_3 = {13'd0, s0_N_in, 3'd0};
|
||||
N_shift_4 = {12'd0, s0_N_in, 4'd0};
|
||||
N_shift_5 = {11'd0, s0_N_in, 5'd0};
|
||||
N_shift_6 = {10'd0, s0_N_in, 6'd0};
|
||||
N_shift_7 = {9'd0, s0_N_in, 7'd0};
|
||||
N_shift_8 = {8'd0, s0_N_in, 8'd0};
|
||||
N_shift_9 = {7'd0, s0_N_in, 9'd0};
|
||||
N_shift_10 = {6'd0, s0_N_in, 10'd0};
|
||||
N_shift_11 = {5'd0, s0_N_in, 11'd0};
|
||||
N_shift_12 = {4'd0, s0_N_in, 12'd0};
|
||||
N_shift_13 = {3'd0, s0_N_in, 13'd0};
|
||||
N_shift_14 = {2'd0, s0_N_in, 14'd0};
|
||||
N_shift_15 = {1'd0, s0_N_in, 15'd0};
|
||||
N_shift_16 = {s0_N_in, 16'd0};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
k_next = 5'd16;
|
||||
case (1'b1)
|
||||
(N_shift_0 >= s0_A_in): begin
|
||||
k_next = 5'd0;
|
||||
end
|
||||
|
||||
(N_shift_1 >= s0_A_in): begin
|
||||
k_next = 5'd1;
|
||||
end
|
||||
|
||||
(N_shift_2 >= s0_A_in): begin
|
||||
k_next = 5'd2;
|
||||
end
|
||||
|
||||
(N_shift_3 >= s0_A_in): begin
|
||||
k_next = 5'd3;
|
||||
end
|
||||
|
||||
(N_shift_4 >= s0_A_in): begin
|
||||
k_next = 5'd4;
|
||||
end
|
||||
|
||||
(N_shift_5 >= s0_A_in): begin
|
||||
k_next = 5'd5;
|
||||
end
|
||||
|
||||
(N_shift_6 >= s0_A_in): begin
|
||||
k_next = 5'd6;
|
||||
end
|
||||
|
||||
(N_shift_7 >= s0_A_in): begin
|
||||
k_next = 5'd7;
|
||||
end
|
||||
|
||||
(N_shift_8 >= s0_A_in): begin
|
||||
k_next = 5'd8;
|
||||
end
|
||||
|
||||
(N_shift_9 >= s0_A_in): begin
|
||||
k_next = 5'd9;
|
||||
end
|
||||
|
||||
(N_shift_10 >= s0_A_in): begin
|
||||
k_next = 5'd10;
|
||||
end
|
||||
|
||||
(N_shift_11 >= s0_A_in): begin
|
||||
k_next = 5'd11;
|
||||
end
|
||||
|
||||
(N_shift_12 >= s0_A_in): begin
|
||||
k_next = 5'd12;
|
||||
end
|
||||
|
||||
(N_shift_13 >= s0_A_in): begin
|
||||
k_next = 5'd13;
|
||||
end
|
||||
|
||||
(N_shift_14 >= s0_A_in): begin
|
||||
k_next = 5'd14;
|
||||
end
|
||||
|
||||
(N_shift_15 >= s0_A_in): begin
|
||||
k_next = 5'd15;
|
||||
end
|
||||
|
||||
default: begin
|
||||
k_next = 5'd16;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
k_or_near_is_zero = 1'b0;
|
||||
if (k_next == 5'd0 && s0_NEAR == 6'd0) begin
|
||||
k_or_near_is_zero = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
map_bias_check = {s0_B_in[31], s0_B_in} + {s0_B_in[31], s0_B_in} + $signed({17'd0, s0_N_in}) - 33'sd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
map_invert_next = 1'b0;
|
||||
if (k_or_near_is_zero && map_bias_check < 33'sd0) begin
|
||||
map_invert_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.6 variables update
|
||||
// Pseudocode : If N[Q] == RESET then halve A[Q], B[Q], and N[Q].
|
||||
A_after_reset = stage_A_accum;
|
||||
B_after_reset = stage_B_accum;
|
||||
N_after_increment = stage_N_in + 16'd1;
|
||||
if (stage_N_in == stage_RESET) begin
|
||||
A_after_reset = stage_A_accum >> 1;
|
||||
B_after_reset = stage_B_accum >>> 1;
|
||||
N_after_increment = stage_N_halved_plus_one;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
bias_stage_N_signed_ext = $signed({25'd0, prebias_N_after_increment});
|
||||
bias_stage_negative_N_plus_one = -bias_stage_N_signed_ext + 41'sd1;
|
||||
bias_stage_B_plus_N = prebias_B_after_reset + bias_stage_N_signed_ext;
|
||||
bias_stage_B_minus_N = prebias_B_after_reset - bias_stage_N_signed_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.6 variables update
|
||||
// Pseudocode : Bias correction for B[Q] and C[Q].
|
||||
// Stage note : Compute the corrected B[Q]/C[Q] from the registered
|
||||
// pre-bias state so RESET folding and bias correction do not sit in the
|
||||
// same timing cone.
|
||||
bias_stage_B_after_bias = prebias_B_after_reset;
|
||||
bias_stage_C_after_bias = prebias_C_in;
|
||||
|
||||
case (1'b1)
|
||||
(bias_stage_B_plus_N <= 41'sd0): begin
|
||||
bias_stage_B_after_bias = bias_stage_B_plus_N;
|
||||
if (bias_stage_B_plus_N <= -bias_stage_N_signed_ext) begin
|
||||
bias_stage_B_after_bias = bias_stage_negative_N_plus_one;
|
||||
end
|
||||
if (prebias_C_in > -9'sd128) begin
|
||||
bias_stage_C_after_bias = prebias_C_in - 9'sd1;
|
||||
end
|
||||
end
|
||||
|
||||
(prebias_B_after_reset > 41'sd0): begin
|
||||
bias_stage_B_after_bias = bias_stage_B_minus_N;
|
||||
if (bias_stage_B_minus_N > 41'sd0) begin
|
||||
bias_stage_B_after_bias = 41'sd0;
|
||||
end
|
||||
if (prebias_C_in < 9'sd127) begin
|
||||
bias_stage_C_after_bias = prebias_C_in + 9'sd1;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
bias_stage_B_after_bias = prebias_B_after_reset;
|
||||
bias_stage_C_after_bias = prebias_C_in;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_accept = 1'b0;
|
||||
if (result_valid && result_ready) begin
|
||||
result_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// The bias stage can retire whenever the local second output slot is free.
|
||||
// It does not need to see result_ready from the downstream context
|
||||
// write/error mapper path in the same cycle.
|
||||
result_slot_open = !result_next_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
bias_to_result = 1'b0;
|
||||
if (bias_valid && result_slot_open) begin
|
||||
bias_to_result = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
bias_open = 1'b0;
|
||||
if (!bias_valid || bias_to_result) begin
|
||||
bias_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prebias_to_bias = 1'b0;
|
||||
if (prebias_valid && bias_open) begin
|
||||
prebias_to_bias = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prebias_open = 1'b0;
|
||||
if (!prebias_valid || prebias_to_bias) begin
|
||||
prebias_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_to_prebias = 1'b0;
|
||||
if (stage_valid && prebias_open) begin
|
||||
stage_to_prebias = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_open = 1'b0;
|
||||
if (!stage_valid || stage_to_prebias) begin
|
||||
stage_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s2_to_stage = 1'b0;
|
||||
if (s2_valid && stage_open) begin
|
||||
s2_to_stage = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s2_open = 1'b0;
|
||||
if (!s2_valid || s2_to_stage) begin
|
||||
s2_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s1_to_s2 = 1'b0;
|
||||
if (s1_valid && s2_open) begin
|
||||
s1_to_s2 = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s1_open = 1'b0;
|
||||
if (!s1_valid || s1_to_s2) begin
|
||||
s1_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s0_to_s1 = 1'b0;
|
||||
if (s0_valid && s1_open) begin
|
||||
s0_to_s1 = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s0_open = 1'b0;
|
||||
if (!s0_valid || s0_to_s1) begin
|
||||
s0_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_update = 1'b0;
|
||||
if (update_valid && update_ready) begin
|
||||
accept_update = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_direct_from_bias = 1'b0;
|
||||
if (bias_to_result && !result_valid) begin
|
||||
result_direct_from_bias = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_store_next = 1'b0;
|
||||
if (bias_to_result && result_valid) begin
|
||||
result_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_promote_next = 1'b0;
|
||||
if (result_next_valid && !result_valid) begin
|
||||
result_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
update_ready = 1'b0;
|
||||
if (!update_next_valid) begin
|
||||
update_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
update_load_input = 1'b0;
|
||||
if (accept_update && (!s0_valid || s0_to_s1)) begin
|
||||
update_load_input = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
update_store_next = 1'b0;
|
||||
if (accept_update && s0_valid && !s0_to_s1) begin
|
||||
update_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s0_promote_next = 1'b0;
|
||||
if (s0_to_s1 && update_next_valid) begin
|
||||
s0_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
s0_valid <= 1'b0;
|
||||
s0_A_in <= 32'd0;
|
||||
s0_B_in <= 32'sd0;
|
||||
s0_C_in <= 9'sd0;
|
||||
s0_N_in <= 16'd0;
|
||||
s0_Errval <= 32'sd0;
|
||||
s0_context_index <= 9'd0;
|
||||
s0_strip_last_pixel <= 1'b0;
|
||||
s0_qbpp <= 5'd0;
|
||||
s0_LIMIT <= 7'd0;
|
||||
s0_NEAR <= 6'd0;
|
||||
s0_RESET <= 16'd0;
|
||||
update_next_valid <= 1'b0;
|
||||
update_next_A_in <= 32'd0;
|
||||
update_next_B_in <= 32'sd0;
|
||||
update_next_C_in <= 9'sd0;
|
||||
update_next_N_in <= 16'd0;
|
||||
update_next_Errval <= 32'sd0;
|
||||
update_next_context_index <= 9'd0;
|
||||
update_next_strip_last_pixel <= 1'b0;
|
||||
update_next_qbpp <= 5'd0;
|
||||
update_next_LIMIT <= 7'd0;
|
||||
update_next_NEAR <= 6'd0;
|
||||
update_next_RESET <= 16'd0;
|
||||
s1_valid <= 1'b0;
|
||||
s1_A_accum <= 32'd0;
|
||||
s1_B_in <= 32'sd0;
|
||||
s1_C_in <= 9'sd0;
|
||||
s1_N_in <= 16'd0;
|
||||
s1_N_halved_plus_one <= 16'd0;
|
||||
s1_RESET <= 16'd0;
|
||||
s1_k <= 5'd0;
|
||||
s1_Errval <= 32'sd0;
|
||||
s1_context_index <= 9'd0;
|
||||
s1_strip_last_pixel <= 1'b0;
|
||||
s1_qbpp <= 5'd0;
|
||||
s1_LIMIT <= 7'd0;
|
||||
s1_map_invert <= 1'b0;
|
||||
s1_Errval_ext <= 33'sd0;
|
||||
s1_near_scale <= 8'sd1;
|
||||
s2_valid <= 1'b0;
|
||||
s2_A_accum <= 32'd0;
|
||||
s2_B_in <= 32'sd0;
|
||||
s2_B_delta <= 41'sd0;
|
||||
s2_C_in <= 9'sd0;
|
||||
s2_N_in <= 16'd0;
|
||||
s2_N_halved_plus_one <= 16'd0;
|
||||
s2_RESET <= 16'd0;
|
||||
s2_k <= 5'd0;
|
||||
s2_Errval <= 32'sd0;
|
||||
s2_context_index <= 9'd0;
|
||||
s2_strip_last_pixel <= 1'b0;
|
||||
s2_qbpp <= 5'd0;
|
||||
s2_LIMIT <= 7'd0;
|
||||
s2_map_invert <= 1'b0;
|
||||
stage_valid <= 1'b0;
|
||||
stage_A_accum <= 32'd0;
|
||||
stage_B_accum <= 41'sd0;
|
||||
stage_C_in <= 9'sd0;
|
||||
stage_N_in <= 16'd0;
|
||||
stage_N_halved_plus_one <= 16'd0;
|
||||
stage_RESET <= 16'd0;
|
||||
stage_k <= 5'd0;
|
||||
stage_Errval <= 32'sd0;
|
||||
stage_context_index <= 9'd0;
|
||||
stage_strip_last_pixel <= 1'b0;
|
||||
stage_qbpp <= 5'd0;
|
||||
stage_LIMIT <= 7'd0;
|
||||
stage_map_invert <= 1'b0;
|
||||
prebias_valid <= 1'b0;
|
||||
prebias_A_after_reset <= 32'd0;
|
||||
prebias_B_after_reset <= 41'sd0;
|
||||
prebias_C_in <= 9'sd0;
|
||||
prebias_N_after_increment <= 16'd0;
|
||||
prebias_k <= 5'd0;
|
||||
prebias_Errval <= 32'sd0;
|
||||
prebias_context_index <= 9'd0;
|
||||
prebias_strip_last_pixel <= 1'b0;
|
||||
prebias_qbpp <= 5'd0;
|
||||
prebias_LIMIT <= 7'd0;
|
||||
prebias_map_invert <= 1'b0;
|
||||
bias_valid <= 1'b0;
|
||||
bias_A_after_reset <= 32'd0;
|
||||
bias_B_after_bias <= 32'sd0;
|
||||
bias_C_after_bias <= 9'sd0;
|
||||
bias_N_after_increment <= 16'd0;
|
||||
bias_k <= 5'd0;
|
||||
bias_Errval <= 32'sd0;
|
||||
bias_context_index <= 9'd0;
|
||||
bias_strip_last_pixel <= 1'b0;
|
||||
bias_qbpp <= 5'd0;
|
||||
bias_LIMIT <= 7'd0;
|
||||
bias_map_invert <= 1'b0;
|
||||
result_valid <= 1'b0;
|
||||
result_next_valid <= 1'b0;
|
||||
result_next_k <= 5'd0;
|
||||
result_next_Errval_out <= 32'sd0;
|
||||
result_next_context_index_out <= 9'd0;
|
||||
result_next_strip_last_pixel_out <= 1'b0;
|
||||
result_next_qbpp_out <= 5'd0;
|
||||
result_next_LIMIT_out <= 7'd0;
|
||||
result_next_map_invert <= 1'b0;
|
||||
result_next_A_out <= 32'd0;
|
||||
result_next_B_out <= 32'sd0;
|
||||
result_next_C_out <= 9'sd0;
|
||||
result_next_N_out <= 16'd0;
|
||||
k <= 5'd0;
|
||||
Errval_out <= 32'sd0;
|
||||
context_index_out <= 9'd0;
|
||||
strip_last_pixel_out <= 1'b0;
|
||||
qbpp_out <= 5'd0;
|
||||
LIMIT_out <= 7'd0;
|
||||
map_invert <= 1'b0;
|
||||
A_out <= 32'd0;
|
||||
B_out <= 32'sd0;
|
||||
C_out <= 9'sd0;
|
||||
N_out <= 16'd0;
|
||||
end else begin
|
||||
if (result_accept && !result_promote_next && !result_direct_from_bias) begin
|
||||
result_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (result_promote_next) begin
|
||||
result_valid <= 1'b1;
|
||||
k <= result_next_k;
|
||||
Errval_out <= result_next_Errval_out;
|
||||
context_index_out <= result_next_context_index_out;
|
||||
strip_last_pixel_out <= result_next_strip_last_pixel_out;
|
||||
qbpp_out <= result_next_qbpp_out;
|
||||
LIMIT_out <= result_next_LIMIT_out;
|
||||
map_invert <= result_next_map_invert;
|
||||
A_out <= result_next_A_out;
|
||||
B_out <= result_next_B_out;
|
||||
C_out <= result_next_C_out;
|
||||
N_out <= result_next_N_out;
|
||||
result_next_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (result_direct_from_bias) begin
|
||||
result_valid <= 1'b1;
|
||||
k <= bias_k;
|
||||
Errval_out <= bias_Errval;
|
||||
context_index_out <= bias_context_index;
|
||||
strip_last_pixel_out <= bias_strip_last_pixel;
|
||||
qbpp_out <= bias_qbpp;
|
||||
LIMIT_out <= bias_LIMIT;
|
||||
map_invert <= bias_map_invert;
|
||||
A_out <= bias_A_after_reset;
|
||||
B_out <= bias_B_after_bias;
|
||||
C_out <= bias_C_after_bias;
|
||||
N_out <= bias_N_after_increment;
|
||||
end
|
||||
|
||||
if (result_store_next) begin
|
||||
result_next_valid <= 1'b1;
|
||||
result_next_k <= bias_k;
|
||||
result_next_Errval_out <= bias_Errval;
|
||||
result_next_context_index_out <= bias_context_index;
|
||||
result_next_strip_last_pixel_out <= bias_strip_last_pixel;
|
||||
result_next_qbpp_out <= bias_qbpp;
|
||||
result_next_LIMIT_out <= bias_LIMIT;
|
||||
result_next_map_invert <= bias_map_invert;
|
||||
result_next_A_out <= bias_A_after_reset;
|
||||
result_next_B_out <= bias_B_after_bias;
|
||||
result_next_C_out <= bias_C_after_bias;
|
||||
result_next_N_out <= bias_N_after_increment;
|
||||
end
|
||||
|
||||
if (prebias_to_bias) begin
|
||||
bias_valid <= 1'b1;
|
||||
bias_A_after_reset <= prebias_A_after_reset;
|
||||
bias_B_after_bias <= bias_stage_B_after_bias[31:0];
|
||||
bias_C_after_bias <= bias_stage_C_after_bias;
|
||||
bias_N_after_increment <= prebias_N_after_increment;
|
||||
bias_k <= prebias_k;
|
||||
bias_Errval <= prebias_Errval;
|
||||
bias_context_index <= prebias_context_index;
|
||||
bias_strip_last_pixel <= prebias_strip_last_pixel;
|
||||
bias_qbpp <= prebias_qbpp;
|
||||
bias_LIMIT <= prebias_LIMIT;
|
||||
bias_map_invert <= prebias_map_invert;
|
||||
end else if (bias_to_result) begin
|
||||
bias_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (stage_to_prebias) begin
|
||||
prebias_valid <= 1'b1;
|
||||
prebias_A_after_reset <= A_after_reset;
|
||||
prebias_B_after_reset <= B_after_reset;
|
||||
prebias_C_in <= stage_C_in;
|
||||
prebias_N_after_increment <= N_after_increment;
|
||||
prebias_k <= stage_k;
|
||||
prebias_Errval <= stage_Errval;
|
||||
prebias_context_index <= stage_context_index;
|
||||
prebias_strip_last_pixel <= stage_strip_last_pixel;
|
||||
prebias_qbpp <= stage_qbpp;
|
||||
prebias_LIMIT <= stage_LIMIT;
|
||||
prebias_map_invert <= stage_map_invert;
|
||||
end else if (prebias_to_bias) begin
|
||||
prebias_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s2_to_stage) begin
|
||||
stage_valid <= 1'b1;
|
||||
stage_A_accum <= s2_A_accum;
|
||||
stage_B_accum <= B_accum_next;
|
||||
stage_C_in <= s2_C_in;
|
||||
stage_N_in <= s2_N_in;
|
||||
stage_N_halved_plus_one <= s2_N_halved_plus_one;
|
||||
stage_RESET <= s2_RESET;
|
||||
stage_k <= s2_k;
|
||||
stage_Errval <= s2_Errval;
|
||||
stage_context_index <= s2_context_index;
|
||||
stage_strip_last_pixel <= s2_strip_last_pixel;
|
||||
stage_qbpp <= s2_qbpp;
|
||||
stage_LIMIT <= s2_LIMIT;
|
||||
stage_map_invert <= s2_map_invert;
|
||||
end else if (stage_to_prebias) begin
|
||||
stage_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s1_to_s2) begin
|
||||
s2_valid <= 1'b1;
|
||||
s2_A_accum <= s1_A_accum;
|
||||
s2_B_in <= s1_B_in;
|
||||
s2_B_delta <= B_delta;
|
||||
s2_C_in <= s1_C_in;
|
||||
s2_N_in <= s1_N_in;
|
||||
s2_N_halved_plus_one <= s1_N_halved_plus_one;
|
||||
s2_RESET <= s1_RESET;
|
||||
s2_k <= s1_k;
|
||||
s2_Errval <= s1_Errval;
|
||||
s2_context_index <= s1_context_index;
|
||||
s2_strip_last_pixel <= s1_strip_last_pixel;
|
||||
s2_qbpp <= s1_qbpp;
|
||||
s2_LIMIT <= s1_LIMIT;
|
||||
s2_map_invert <= s1_map_invert;
|
||||
end else if (s2_to_stage) begin
|
||||
s2_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s0_to_s1) begin
|
||||
s1_valid <= 1'b1;
|
||||
s1_A_accum <= A_accum_next;
|
||||
s1_B_in <= s0_B_in;
|
||||
s1_C_in <= s0_C_in;
|
||||
s1_N_in <= s0_N_in;
|
||||
s1_N_halved_plus_one <= N_halved_plus_one_next;
|
||||
s1_RESET <= s0_RESET;
|
||||
s1_k <= k_next;
|
||||
s1_Errval <= s0_Errval;
|
||||
s1_context_index <= s0_context_index;
|
||||
s1_strip_last_pixel <= s0_strip_last_pixel;
|
||||
s1_qbpp <= s0_qbpp;
|
||||
s1_LIMIT <= s0_LIMIT;
|
||||
s1_map_invert <= map_invert_next;
|
||||
s1_Errval_ext <= Errval_ext;
|
||||
s1_near_scale <= near_scale;
|
||||
end else if (s1_to_s2) begin
|
||||
s1_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s0_promote_next) begin
|
||||
s0_valid <= 1'b1;
|
||||
s0_A_in <= update_next_A_in;
|
||||
s0_B_in <= update_next_B_in;
|
||||
s0_C_in <= update_next_C_in;
|
||||
s0_N_in <= update_next_N_in;
|
||||
s0_Errval <= update_next_Errval;
|
||||
s0_context_index <= update_next_context_index;
|
||||
s0_strip_last_pixel <= update_next_strip_last_pixel;
|
||||
s0_qbpp <= update_next_qbpp;
|
||||
s0_LIMIT <= update_next_LIMIT;
|
||||
s0_NEAR <= update_next_NEAR;
|
||||
s0_RESET <= update_next_RESET;
|
||||
update_next_valid <= 1'b0;
|
||||
end else if (s0_to_s1) begin
|
||||
s0_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (update_load_input) begin
|
||||
s0_valid <= 1'b1;
|
||||
s0_A_in <= A_in;
|
||||
s0_B_in <= B_in;
|
||||
s0_C_in <= C_in;
|
||||
s0_N_in <= N_in;
|
||||
s0_Errval <= Errval;
|
||||
s0_context_index <= context_index_in;
|
||||
s0_strip_last_pixel <= strip_last_pixel_in;
|
||||
s0_qbpp <= qbpp_in;
|
||||
s0_LIMIT <= LIMIT_in;
|
||||
s0_NEAR <= NEAR;
|
||||
s0_RESET <= RESET;
|
||||
end
|
||||
|
||||
if (update_store_next) begin
|
||||
update_next_valid <= 1'b1;
|
||||
update_next_A_in <= A_in;
|
||||
update_next_B_in <= B_in;
|
||||
update_next_C_in <= C_in;
|
||||
update_next_N_in <= N_in;
|
||||
update_next_Errval <= Errval;
|
||||
update_next_context_index <= context_index_in;
|
||||
update_next_strip_last_pixel <= strip_last_pixel_in;
|
||||
update_next_qbpp <= qbpp_in;
|
||||
update_next_LIMIT <= LIMIT_in;
|
||||
update_next_NEAR <= NEAR;
|
||||
update_next_RESET <= RESET;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
144
fpga/verilog/jls_error_mapper.sv
Normal file
144
fpga/verilog/jls_error_mapper.sv
Normal file
@@ -0,0 +1,144 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Map signed Errval into non-negative MErrval
|
||||
// Trace : docs/jls_traceability.md#golomb-rice-encoding
|
||||
// Example : Errval=-3 maps to MErrval=5; Errval=3 maps to MErrval=6.
|
||||
//
|
||||
// Registered error mapper. The upstream regular-mode stage computes Errval,
|
||||
// k, LIMIT, qbpp, and whether the context correction inverts Errval before
|
||||
// mapping. This module only performs the standard signed-to-unsigned mapping
|
||||
// and forwards coding parameters to jls_golomb_encoder.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_error_mapper (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input prediction-error event is valid.
|
||||
input var logic err_valid,
|
||||
|
||||
// This mapper can accept the current prediction-error event.
|
||||
output logic err_ready,
|
||||
|
||||
// Standard signed prediction error after quantization and context sign.
|
||||
input var logic signed [31:0] Errval,
|
||||
|
||||
// High when context correction requests bitwise inversion before mapping.
|
||||
input var logic map_invert,
|
||||
|
||||
// JPEG-LS Golomb parameter k.
|
||||
input var logic [4:0] k,
|
||||
|
||||
// JPEG-LS LIMIT parameter for the current coding mode.
|
||||
input var logic [6:0] limit,
|
||||
|
||||
// JPEG-LS qbpp parameter for the current coding mode.
|
||||
input var logic [4:0] qbpp,
|
||||
|
||||
// Last pixel of the current strip frame.
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// Mapped-error event is valid.
|
||||
output logic mapped_valid,
|
||||
|
||||
// Downstream Golomb encoder accepted the mapped-error event.
|
||||
input var logic mapped_ready,
|
||||
|
||||
// Standard non-negative mapped error value.
|
||||
output logic [31:0] MErrval,
|
||||
|
||||
// Forwarded coding parameters.
|
||||
output logic [4:0] mapped_k,
|
||||
output logic [6:0] mapped_limit,
|
||||
output logic [4:0] mapped_qbpp,
|
||||
|
||||
// Forwarded strip boundary flag.
|
||||
output logic mapped_strip_last_pixel
|
||||
);
|
||||
|
||||
// Source value after optional context correction.
|
||||
logic signed [31:0] corrected_Errval;
|
||||
logic signed [32:0] corrected_Errval_ext;
|
||||
logic signed [32:0] abs_Errval_ext;
|
||||
logic [32:0] MErrval_ext;
|
||||
|
||||
// Handshake terms.
|
||||
logic slot_open;
|
||||
logic accept_err;
|
||||
|
||||
always_comb begin
|
||||
corrected_Errval = Errval;
|
||||
if (map_invert) begin
|
||||
corrected_Errval = ~Errval;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
corrected_Errval_ext = {corrected_Errval[31], corrected_Errval};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
abs_Errval_ext = corrected_Errval_ext;
|
||||
if (corrected_Errval_ext < 33'sd0) begin
|
||||
abs_Errval_ext = -corrected_Errval_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
MErrval_ext = abs_Errval_ext[32:0] << 1;
|
||||
if (corrected_Errval_ext < 33'sd0) begin
|
||||
MErrval_ext = (abs_Errval_ext[32:0] << 1) - 33'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_open = 1'b0;
|
||||
if (!mapped_valid || mapped_ready) begin
|
||||
slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
err_ready = slot_open;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_err = 1'b0;
|
||||
if (err_valid && err_ready) begin
|
||||
accept_err = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
mapped_valid <= 1'b0;
|
||||
MErrval <= 32'd0;
|
||||
mapped_k <= 5'd0;
|
||||
mapped_limit <= 7'd0;
|
||||
mapped_qbpp <= 5'd0;
|
||||
mapped_strip_last_pixel <= 1'b0;
|
||||
end else begin
|
||||
if (mapped_valid && mapped_ready && !accept_err) begin
|
||||
mapped_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (accept_err) begin
|
||||
mapped_valid <= 1'b1;
|
||||
MErrval <= MErrval_ext[31:0];
|
||||
mapped_k <= k;
|
||||
mapped_limit <= limit;
|
||||
mapped_qbpp <= qbpp;
|
||||
mapped_strip_last_pixel <= strip_last_pixel;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
371
fpga/verilog/jls_golomb_encoder.sv
Normal file
371
fpga/verilog/jls_golomb_encoder.sv
Normal file
@@ -0,0 +1,371 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Encode MErrval using k, LIMIT, and qbpp
|
||||
// Trace : docs/jls_traceability.md#golomb-rice-encoding
|
||||
// Example : MErrval=5, k=1 emits bits 0,0,1,1.
|
||||
//
|
||||
// Golomb code-event generator. This module starts from the standard mapped
|
||||
// error value MErrval and Golomb parameter k. Earlier pipeline stages are
|
||||
// responsible for computing Errval, MErrval, k, LIMIT, and qbpp from the
|
||||
// JPEG-LS context variables.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_golomb_encoder #(
|
||||
// Maximum code bits sent to jls_bit_packer in one event.
|
||||
parameter int MAX_CODE_BITS = 64
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// MErrval input event is valid.
|
||||
input var logic mapped_valid,
|
||||
|
||||
// This encoder can accept MErrval and coding parameters.
|
||||
output logic mapped_ready,
|
||||
|
||||
// JPEG-LS mapped error value, named after the standard pseudocode variable.
|
||||
input var logic [31:0] MErrval,
|
||||
|
||||
// JPEG-LS Golomb parameter k.
|
||||
input var logic [4:0] k,
|
||||
|
||||
// JPEG-LS LIMIT parameter for the current coding mode.
|
||||
input var logic [6:0] limit,
|
||||
|
||||
// JPEG-LS qbpp parameter for the current coding mode.
|
||||
input var logic [4:0] qbpp,
|
||||
|
||||
// Last pixel of the current strip frame.
|
||||
input var logic mapped_strip_last_pixel,
|
||||
|
||||
// Generated left-aligned code event for jls_bit_packer.
|
||||
output logic code_valid,
|
||||
|
||||
// jls_bit_packer accepted the current code event.
|
||||
input var logic code_ready,
|
||||
|
||||
// Left-aligned code bits. The first bit is code_bits[MAX_CODE_BITS-1].
|
||||
output logic [MAX_CODE_BITS-1:0] code_bits,
|
||||
|
||||
// Number of valid bits in code_bits.
|
||||
output logic [6:0] code_bit_count,
|
||||
|
||||
// Encoder completed the current MErrval event.
|
||||
output logic mapped_done,
|
||||
|
||||
// Encoder completed the last pixel of the current strip frame.
|
||||
output logic mapped_last_done
|
||||
);
|
||||
|
||||
// State machine for emitting prefix chunks and optional suffix bits.
|
||||
typedef enum logic [2:0] {
|
||||
STATE_IDLE = 3'd0,
|
||||
STATE_PREP = 3'd1,
|
||||
STATE_SUFFIX_PREP = 3'd2,
|
||||
STATE_PREFIX = 3'd3,
|
||||
STATE_SUFFIX = 3'd4,
|
||||
STATE_DONE = 3'd5,
|
||||
STATE_SELECT = 3'd6
|
||||
} golomb_state_e;
|
||||
|
||||
// Maximum code event size as a runtime-comparable constant.
|
||||
localparam logic [6:0] MAX_CODE_BITS_VALUE = MAX_CODE_BITS;
|
||||
|
||||
// Current state.
|
||||
golomb_state_e state;
|
||||
|
||||
// Latched coding parameters for the active mapped-error event.
|
||||
logic [6:0] prefix_remaining;
|
||||
logic [31:0] suffix_bits;
|
||||
logic [6:0] suffix_bit_count;
|
||||
logic active_strip_last_pixel;
|
||||
logic [31:0] latched_MErrval;
|
||||
logic [31:0] latched_MErrval_minus_one;
|
||||
logic [4:0] latched_k;
|
||||
logic [6:0] latched_limit;
|
||||
logic [4:0] latched_qbpp;
|
||||
logic latched_strip_last_pixel;
|
||||
logic [31:0] suffix_base_bits;
|
||||
logic [31:0] high_bits_latched;
|
||||
logic [6:0] prefix_threshold_latched;
|
||||
logic [6:0] normal_prefix_count_latched;
|
||||
logic [6:0] limited_prefix_count_latched;
|
||||
|
||||
// Pending code-event bookkeeping. Counters are updated when code_ready
|
||||
// accepts the event.
|
||||
logic pending_prefix_event;
|
||||
logic pending_prefix_last;
|
||||
logic pending_suffix_event;
|
||||
logic [6:0] pending_prefix_count;
|
||||
|
||||
// Combinational input analysis.
|
||||
logic accept_mapped;
|
||||
logic [31:0] high_bits;
|
||||
logic [6:0] prefix_threshold;
|
||||
logic use_regular_golomb_path;
|
||||
logic [6:0] normal_prefix_count;
|
||||
logic [6:0] limited_prefix_count;
|
||||
logic [6:0] selected_prefix_count;
|
||||
logic [6:0] selected_suffix_count;
|
||||
logic high_bits_upper_nonzero;
|
||||
logic [31:0] suffix_mask;
|
||||
logic [31:0] selected_suffix_bits;
|
||||
|
||||
// Combinational code-event builders.
|
||||
logic code_slot_open;
|
||||
logic [6:0] prefix_emit_count;
|
||||
logic prefix_emit_is_last;
|
||||
logic [MAX_CODE_BITS-1:0] prefix_event_bits;
|
||||
logic [MAX_CODE_BITS-1:0] suffix_event_bits;
|
||||
|
||||
// Loop index declared outside procedural blocks per project coding style.
|
||||
integer suffix_bit_index;
|
||||
|
||||
always_comb begin
|
||||
mapped_ready = 1'b0;
|
||||
if (state == STATE_IDLE && !code_valid) begin
|
||||
mapped_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_mapped = 1'b0;
|
||||
if (mapped_valid && mapped_ready) begin
|
||||
accept_mapped = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
code_slot_open = 1'b0;
|
||||
if (!code_valid || code_ready) begin
|
||||
code_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
high_bits = latched_MErrval >> latched_k;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prefix_threshold = 7'd0;
|
||||
if (latched_limit > ({2'b00, latched_qbpp} + 7'd1)) begin
|
||||
prefix_threshold = latched_limit - {2'b00, latched_qbpp} - 7'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
high_bits_upper_nonzero = 1'b0;
|
||||
if (high_bits_latched[31:7] != 25'd0) begin
|
||||
high_bits_upper_nonzero = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
use_regular_golomb_path = 1'b0;
|
||||
if (!high_bits_upper_nonzero &&
|
||||
high_bits_latched[6:0] < prefix_threshold_latched) begin
|
||||
use_regular_golomb_path = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
normal_prefix_count = high_bits[6:0] + 7'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
limited_prefix_count = 7'd1;
|
||||
if (latched_limit > {2'b00, latched_qbpp}) begin
|
||||
limited_prefix_count = latched_limit - {2'b00, latched_qbpp};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
selected_prefix_count = limited_prefix_count_latched;
|
||||
selected_suffix_count = {2'b00, latched_qbpp};
|
||||
if (use_regular_golomb_path) begin
|
||||
selected_prefix_count = normal_prefix_count_latched;
|
||||
selected_suffix_count = {2'b00, latched_k};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
suffix_mask = 32'd0;
|
||||
if (suffix_bit_count != 7'd0) begin
|
||||
suffix_mask = (32'd1 << suffix_bit_count[4:0]) - 32'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
selected_suffix_bits = suffix_base_bits & suffix_mask;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prefix_emit_count = prefix_remaining;
|
||||
prefix_emit_is_last = 1'b1;
|
||||
if (prefix_remaining > MAX_CODE_BITS_VALUE) begin
|
||||
prefix_emit_count = MAX_CODE_BITS_VALUE;
|
||||
prefix_emit_is_last = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prefix_event_bits = {MAX_CODE_BITS{1'b0}};
|
||||
if (prefix_emit_is_last && prefix_emit_count != 7'd0) begin
|
||||
prefix_event_bits[MAX_CODE_BITS_VALUE - prefix_emit_count] = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
suffix_event_bits = {MAX_CODE_BITS{1'b0}};
|
||||
for (suffix_bit_index = 0; suffix_bit_index < MAX_CODE_BITS; suffix_bit_index = suffix_bit_index + 1) begin
|
||||
if (suffix_bit_index < suffix_bit_count) begin
|
||||
suffix_event_bits[MAX_CODE_BITS - 1 - suffix_bit_index] =
|
||||
suffix_bits[suffix_bit_count - 7'd1 - suffix_bit_index[6:0]];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
prefix_remaining <= 7'd0;
|
||||
suffix_bits <= 32'd0;
|
||||
suffix_bit_count <= 7'd0;
|
||||
active_strip_last_pixel <= 1'b0;
|
||||
latched_MErrval <= 32'd0;
|
||||
latched_MErrval_minus_one <= 32'd0;
|
||||
latched_k <= 5'd0;
|
||||
latched_limit <= 7'd0;
|
||||
latched_qbpp <= 5'd0;
|
||||
latched_strip_last_pixel <= 1'b0;
|
||||
suffix_base_bits <= 32'd0;
|
||||
high_bits_latched <= 32'd0;
|
||||
prefix_threshold_latched <= 7'd0;
|
||||
normal_prefix_count_latched <= 7'd0;
|
||||
limited_prefix_count_latched <= 7'd0;
|
||||
pending_prefix_event <= 1'b0;
|
||||
pending_prefix_last <= 1'b0;
|
||||
pending_suffix_event <= 1'b0;
|
||||
pending_prefix_count <= 7'd0;
|
||||
code_valid <= 1'b0;
|
||||
code_bits <= {MAX_CODE_BITS{1'b0}};
|
||||
code_bit_count <= 7'd0;
|
||||
mapped_done <= 1'b0;
|
||||
mapped_last_done <= 1'b0;
|
||||
end else begin
|
||||
mapped_done <= 1'b0;
|
||||
mapped_last_done <= 1'b0;
|
||||
|
||||
if (code_valid && code_ready) begin
|
||||
code_valid <= 1'b0;
|
||||
code_bits <= {MAX_CODE_BITS{1'b0}};
|
||||
code_bit_count <= 7'd0;
|
||||
|
||||
if (pending_prefix_event) begin
|
||||
prefix_remaining <= prefix_remaining - pending_prefix_count;
|
||||
if (pending_prefix_last) begin
|
||||
if (suffix_bit_count != 7'd0) begin
|
||||
state <= STATE_SUFFIX;
|
||||
end else begin
|
||||
state <= STATE_DONE;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (pending_suffix_event) begin
|
||||
state <= STATE_DONE;
|
||||
end
|
||||
|
||||
pending_prefix_event <= 1'b0;
|
||||
pending_prefix_last <= 1'b0;
|
||||
pending_suffix_event <= 1'b0;
|
||||
pending_prefix_count <= 7'd0;
|
||||
end
|
||||
|
||||
if (accept_mapped) begin
|
||||
latched_MErrval <= MErrval;
|
||||
latched_MErrval_minus_one <= MErrval - 32'd1;
|
||||
latched_k <= k;
|
||||
latched_limit <= limit;
|
||||
latched_qbpp <= qbpp;
|
||||
latched_strip_last_pixel <= mapped_strip_last_pixel;
|
||||
state <= STATE_PREP;
|
||||
end else if (code_slot_open && !code_valid) begin
|
||||
case (state)
|
||||
STATE_PREP: begin
|
||||
// Stage note : The standard Annex G.2 Golomb selection is
|
||||
// split across STATE_PREP and STATE_SELECT. This stage registers
|
||||
// MErrval>>k, LIMIT/qbpp threshold, and candidate prefix lengths
|
||||
// before the compare/mux that writes prefix_remaining.
|
||||
high_bits_latched <= high_bits;
|
||||
prefix_threshold_latched <= prefix_threshold;
|
||||
normal_prefix_count_latched <= normal_prefix_count;
|
||||
limited_prefix_count_latched <= limited_prefix_count;
|
||||
active_strip_last_pixel <= latched_strip_last_pixel;
|
||||
state <= STATE_SELECT;
|
||||
end
|
||||
|
||||
STATE_SELECT: begin
|
||||
// Stage note : Use the registered high_bits value. The standard
|
||||
// condition high_bits < LIMIT-qbpp-1 is implemented as an upper-bit
|
||||
// zero test plus a 7-bit compare, avoiding a long 32-bit carry path.
|
||||
prefix_remaining <= selected_prefix_count;
|
||||
suffix_bit_count <= selected_suffix_count;
|
||||
suffix_base_bits <= latched_MErrval_minus_one;
|
||||
if (use_regular_golomb_path) begin
|
||||
suffix_base_bits <= latched_MErrval;
|
||||
end
|
||||
state <= STATE_SUFFIX_PREP;
|
||||
end
|
||||
|
||||
STATE_SUFFIX_PREP: begin
|
||||
suffix_bits <= selected_suffix_bits;
|
||||
state <= STATE_PREFIX;
|
||||
end
|
||||
|
||||
STATE_PREFIX: begin
|
||||
if (prefix_remaining != 7'd0) begin
|
||||
code_valid <= 1'b1;
|
||||
code_bits <= prefix_event_bits;
|
||||
code_bit_count <= prefix_emit_count;
|
||||
pending_prefix_event <= 1'b1;
|
||||
pending_prefix_last <= prefix_emit_is_last;
|
||||
pending_prefix_count <= prefix_emit_count;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_SUFFIX: begin
|
||||
if (suffix_bit_count != 7'd0) begin
|
||||
code_valid <= 1'b1;
|
||||
code_bits <= suffix_event_bits;
|
||||
code_bit_count <= suffix_bit_count;
|
||||
pending_suffix_event <= 1'b1;
|
||||
end else begin
|
||||
state <= STATE_DONE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_DONE: begin
|
||||
mapped_done <= 1'b1;
|
||||
mapped_last_done <= active_strip_last_pixel;
|
||||
active_strip_last_pixel <= 1'b0;
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
|
||||
default: begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
442
fpga/verilog/jls_header_writer.sv
Normal file
442
fpga/verilog/jls_header_writer.sv
Normal file
@@ -0,0 +1,442 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.2 frame header, C.2.3 scan header, C.2.4.1 LSE
|
||||
// Figure : N/A
|
||||
// Table : Table C.1 preset parameters, Table C.2 RESET, Table C.3 defaults
|
||||
// Pseudocode : JPEG-LS marker segment emission before and after one scan
|
||||
// Trace : docs/jls_traceability.md#jls-header-markers
|
||||
// Example : For PIX_WIDTH=8 and width=32, SOF55 emits P=8, Y=16, X=32.
|
||||
//
|
||||
// Header writer for one standalone grayscale JPEG-LS strip frame. A strip
|
||||
// start command emits SOI, SOF55, LSE preset coding parameters, and SOS. A
|
||||
// strip finish command emits EOI after the entropy payload has been flushed by
|
||||
// the bit packer.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_header_writer #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Start command for one standalone strip frame.
|
||||
input var logic strip_start_valid,
|
||||
|
||||
// High when a strip start command can be accepted.
|
||||
output logic strip_start_ready,
|
||||
|
||||
// Marks the first strip of an original input image for ofifo_wdata[8].
|
||||
input var logic original_image_first_strip,
|
||||
|
||||
// JPEG-LS frame width written to SOF55.X.
|
||||
input var logic [12:0] strip_width,
|
||||
|
||||
// JPEG-LS frame height written to SOF55.Y.
|
||||
input var logic [12:0] strip_height,
|
||||
|
||||
// NEAR parameter written to the JPEG-LS SOS segment.
|
||||
input var logic [5:0] near,
|
||||
|
||||
// JPEG-LS LSE MAXVAL preset coding parameter.
|
||||
input var logic [15:0] preset_maxval,
|
||||
|
||||
// JPEG-LS LSE T1 preset coding parameter.
|
||||
input var logic [15:0] preset_t1,
|
||||
|
||||
// JPEG-LS LSE T2 preset coding parameter.
|
||||
input var logic [15:0] preset_t2,
|
||||
|
||||
// JPEG-LS LSE T3 preset coding parameter.
|
||||
input var logic [15:0] preset_t3,
|
||||
|
||||
// JPEG-LS LSE RESET preset coding parameter.
|
||||
input var logic [15:0] preset_reset,
|
||||
|
||||
// Finish command after the strip entropy payload has been byte-flushed.
|
||||
input var logic strip_finish_valid,
|
||||
|
||||
// High when a strip finish command can be accepted.
|
||||
output logic strip_finish_ready,
|
||||
|
||||
// Encoded marker byte is valid.
|
||||
output logic byte_valid,
|
||||
|
||||
// Downstream byte buffer can accept the marker byte.
|
||||
input var logic byte_ready,
|
||||
|
||||
// Encoded marker byte in JPEG marker-stream order.
|
||||
output logic [7:0] byte_data,
|
||||
|
||||
// Sideband for the first SOI byte of an original input image.
|
||||
output logic original_image_start,
|
||||
|
||||
// Header completion pulse after the last SOS byte is accepted.
|
||||
output logic header_done,
|
||||
|
||||
// EOI completion pulse after the EOI marker is accepted.
|
||||
output logic eoi_done
|
||||
);
|
||||
|
||||
import jls_common_pkg::*;
|
||||
|
||||
// Header stream is SOI(2) + SOF55(13) + LSE(15) + SOS(10).
|
||||
localparam logic [5:0] HEADER_LAST_INDEX = 6'd39;
|
||||
|
||||
// EOI stream is FF D9.
|
||||
localparam logic [1:0] EOI_LAST_INDEX = 2'd1;
|
||||
|
||||
// State machine split keeps the command path shallow and the byte lookup
|
||||
// separate from state update.
|
||||
typedef enum logic [1:0] {
|
||||
STATE_IDLE = 2'd0,
|
||||
STATE_HEADER = 2'd1,
|
||||
STATE_EOI = 2'd2
|
||||
} header_state_e;
|
||||
|
||||
// Current and next state for marker emission.
|
||||
header_state_e state;
|
||||
header_state_e state_next;
|
||||
|
||||
// Byte indexes inside the header and EOI byte sequences.
|
||||
logic [5:0] header_index;
|
||||
logic [5:0] header_index_next;
|
||||
logic [1:0] eoi_index;
|
||||
logic [1:0] eoi_index_next;
|
||||
|
||||
// Latched command fields used while emitting a strip header.
|
||||
logic latched_original_image_first_strip;
|
||||
logic [12:0] latched_strip_width;
|
||||
logic [12:0] latched_strip_height;
|
||||
logic [5:0] latched_near;
|
||||
logic [15:0] latched_preset_maxval;
|
||||
logic [15:0] latched_preset_t1;
|
||||
logic [15:0] latched_preset_t2;
|
||||
logic [15:0] latched_preset_t3;
|
||||
logic [15:0] latched_preset_reset;
|
||||
|
||||
// Decoded byte fields from latched command fields.
|
||||
logic [7:0] strip_width_hi;
|
||||
logic [7:0] strip_width_lo;
|
||||
logic [7:0] strip_height_hi;
|
||||
logic [7:0] strip_height_lo;
|
||||
logic [7:0] near_byte;
|
||||
logic [7:0] preset_maxval_hi;
|
||||
logic [7:0] preset_maxval_lo;
|
||||
logic [7:0] preset_t1_hi;
|
||||
logic [7:0] preset_t1_lo;
|
||||
logic [7:0] preset_t2_hi;
|
||||
logic [7:0] preset_t2_lo;
|
||||
logic [7:0] preset_t3_hi;
|
||||
logic [7:0] preset_t3_lo;
|
||||
logic [7:0] preset_reset_hi;
|
||||
logic [7:0] preset_reset_lo;
|
||||
|
||||
// Output and handshake next-state signals.
|
||||
logic accept_start;
|
||||
logic accept_finish;
|
||||
logic output_fire;
|
||||
logic byte_valid_next;
|
||||
logic [7:0] byte_data_next;
|
||||
logic original_image_start_next;
|
||||
logic [5:0] header_byte_index;
|
||||
logic [1:0] eoi_byte_index;
|
||||
logic [7:0] header_byte;
|
||||
logic [7:0] eoi_byte;
|
||||
|
||||
always_comb begin
|
||||
strip_width_hi = {3'b000, latched_strip_width[12:8]};
|
||||
strip_width_lo = latched_strip_width[7:0];
|
||||
strip_height_hi = {3'b000, latched_strip_height[12:8]};
|
||||
strip_height_lo = latched_strip_height[7:0];
|
||||
near_byte = {2'b00, latched_near};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
preset_maxval_hi = latched_preset_maxval[15:8];
|
||||
preset_maxval_lo = latched_preset_maxval[7:0];
|
||||
preset_t1_hi = latched_preset_t1[15:8];
|
||||
preset_t1_lo = latched_preset_t1[7:0];
|
||||
preset_t2_hi = latched_preset_t2[15:8];
|
||||
preset_t2_lo = latched_preset_t2[7:0];
|
||||
preset_t3_hi = latched_preset_t3[15:8];
|
||||
preset_t3_lo = latched_preset_t3[7:0];
|
||||
preset_reset_hi = latched_preset_reset[15:8];
|
||||
preset_reset_lo = latched_preset_reset[7:0];
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_start_ready = 1'b0;
|
||||
strip_finish_ready = 1'b0;
|
||||
if (state == STATE_IDLE) begin
|
||||
strip_start_ready = 1'b1;
|
||||
if (!strip_start_valid) begin
|
||||
strip_finish_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_start = 1'b0;
|
||||
if (strip_start_valid && strip_start_ready) begin
|
||||
accept_start = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_finish = 1'b0;
|
||||
if (strip_finish_valid && strip_finish_ready) begin
|
||||
accept_finish = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
output_fire = 1'b0;
|
||||
if (byte_valid && byte_ready) begin
|
||||
output_fire = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
header_byte_index = header_index;
|
||||
if (state == STATE_IDLE && accept_start) begin
|
||||
header_byte_index = 6'd0;
|
||||
end else if (state == STATE_HEADER && output_fire && header_index != HEADER_LAST_INDEX) begin
|
||||
header_byte_index = header_index + 6'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
eoi_byte_index = eoi_index;
|
||||
if (state == STATE_IDLE && accept_finish) begin
|
||||
eoi_byte_index = 2'd0;
|
||||
end else if (state == STATE_EOI && output_fire && eoi_index != EOI_LAST_INDEX) begin
|
||||
eoi_byte_index = eoi_index + 2'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
header_byte = 8'h00;
|
||||
case (header_byte_index)
|
||||
// SOI: T.87 C.1 uses JPEG marker syntax from T.81 B.1.
|
||||
6'd0: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd1: header_byte = JLS_MARKER_SOI;
|
||||
|
||||
// SOF55: T.87 C.2.2, one grayscale component.
|
||||
6'd2: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd3: header_byte = JLS_MARKER_SOF55;
|
||||
6'd4: header_byte = 8'h00;
|
||||
6'd5: header_byte = 8'h0B;
|
||||
6'd6: header_byte = PIX_WIDTH[7:0];
|
||||
6'd7: header_byte = strip_height_hi;
|
||||
6'd8: header_byte = strip_height_lo;
|
||||
6'd9: header_byte = strip_width_hi;
|
||||
6'd10: header_byte = strip_width_lo;
|
||||
6'd11: header_byte = 8'h01;
|
||||
6'd12: header_byte = 8'h01;
|
||||
6'd13: header_byte = 8'h11;
|
||||
6'd14: header_byte = 8'h00;
|
||||
|
||||
// LSE preset coding parameters: T.87 C.2.4.1.1, type 1.
|
||||
6'd15: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd16: header_byte = JLS_MARKER_LSE;
|
||||
6'd17: header_byte = 8'h00;
|
||||
6'd18: header_byte = 8'h0D;
|
||||
6'd19: header_byte = 8'h01;
|
||||
6'd20: header_byte = preset_maxval_hi;
|
||||
6'd21: header_byte = preset_maxval_lo;
|
||||
6'd22: header_byte = preset_t1_hi;
|
||||
6'd23: header_byte = preset_t1_lo;
|
||||
6'd24: header_byte = preset_t2_hi;
|
||||
6'd25: header_byte = preset_t2_lo;
|
||||
6'd26: header_byte = preset_t3_hi;
|
||||
6'd27: header_byte = preset_t3_lo;
|
||||
6'd28: header_byte = preset_reset_hi;
|
||||
6'd29: header_byte = preset_reset_lo;
|
||||
|
||||
// SOS: T.87 C.2.3, one component, no mapping table, ILV=0.
|
||||
6'd30: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd31: header_byte = JLS_MARKER_SOS;
|
||||
6'd32: header_byte = 8'h00;
|
||||
6'd33: header_byte = 8'h08;
|
||||
6'd34: header_byte = 8'h01;
|
||||
6'd35: header_byte = 8'h01;
|
||||
6'd36: header_byte = 8'h00;
|
||||
6'd37: header_byte = near_byte;
|
||||
6'd38: header_byte = 8'h00;
|
||||
6'd39: header_byte = 8'h00;
|
||||
default: header_byte = 8'h00;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
eoi_byte = 8'h00;
|
||||
case (eoi_byte_index)
|
||||
2'd0: eoi_byte = JLS_MARKER_PREFIX;
|
||||
2'd1: eoi_byte = JLS_MARKER_EOI;
|
||||
default: eoi_byte = 8'h00;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
state_next = state;
|
||||
header_index_next = header_index;
|
||||
eoi_index_next = eoi_index;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
header_index_next = 6'd0;
|
||||
eoi_index_next = 2'd0;
|
||||
if (accept_start) begin
|
||||
state_next = STATE_HEADER;
|
||||
end else if (accept_finish) begin
|
||||
state_next = STATE_EOI;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_HEADER: begin
|
||||
if (output_fire) begin
|
||||
if (header_index == HEADER_LAST_INDEX) begin
|
||||
header_index_next = 6'd0;
|
||||
state_next = STATE_IDLE;
|
||||
end else begin
|
||||
header_index_next = header_index + 6'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_EOI: begin
|
||||
if (output_fire) begin
|
||||
if (eoi_index == EOI_LAST_INDEX) begin
|
||||
eoi_index_next = 2'd0;
|
||||
state_next = STATE_IDLE;
|
||||
end else begin
|
||||
eoi_index_next = eoi_index + 2'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state_next = STATE_IDLE;
|
||||
header_index_next = 6'd0;
|
||||
eoi_index_next = 2'd0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_valid_next = byte_valid;
|
||||
byte_data_next = byte_data;
|
||||
original_image_start_next = original_image_start;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
original_image_start_next = 1'b0;
|
||||
if (accept_start) begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = header_byte;
|
||||
original_image_start_next = original_image_first_strip;
|
||||
end else if (accept_finish) begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = eoi_byte;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_HEADER: begin
|
||||
if (output_fire) begin
|
||||
if (header_index == HEADER_LAST_INDEX) begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
original_image_start_next = 1'b0;
|
||||
end else begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = header_byte;
|
||||
if (header_byte_index == 6'd0 && latched_original_image_first_strip) begin
|
||||
original_image_start_next = 1'b1;
|
||||
end else begin
|
||||
original_image_start_next = 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_EOI: begin
|
||||
if (output_fire) begin
|
||||
if (eoi_index == EOI_LAST_INDEX) begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
end else begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = eoi_byte;
|
||||
end
|
||||
original_image_start_next = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
original_image_start_next = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
header_index <= 6'd0;
|
||||
eoi_index <= 2'd0;
|
||||
latched_original_image_first_strip <= 1'b0;
|
||||
latched_strip_width <= 13'd0;
|
||||
latched_strip_height <= 13'd0;
|
||||
latched_near <= 6'd0;
|
||||
latched_preset_maxval <= 16'd0;
|
||||
latched_preset_t1 <= 16'd0;
|
||||
latched_preset_t2 <= 16'd0;
|
||||
latched_preset_t3 <= 16'd0;
|
||||
latched_preset_reset <= 16'd0;
|
||||
byte_valid <= 1'b0;
|
||||
byte_data <= 8'h00;
|
||||
original_image_start <= 1'b0;
|
||||
header_done <= 1'b0;
|
||||
eoi_done <= 1'b0;
|
||||
end else begin
|
||||
header_done <= 1'b0;
|
||||
eoi_done <= 1'b0;
|
||||
state <= state_next;
|
||||
header_index <= header_index_next;
|
||||
eoi_index <= eoi_index_next;
|
||||
byte_valid <= byte_valid_next;
|
||||
byte_data <= byte_data_next;
|
||||
original_image_start <= original_image_start_next;
|
||||
|
||||
if (state == STATE_HEADER && output_fire && header_index == HEADER_LAST_INDEX) begin
|
||||
header_done <= 1'b1;
|
||||
end
|
||||
|
||||
if (state == STATE_EOI && output_fire && eoi_index == EOI_LAST_INDEX) begin
|
||||
eoi_done <= 1'b1;
|
||||
end
|
||||
|
||||
if (accept_start) begin
|
||||
latched_original_image_first_strip <= original_image_first_strip;
|
||||
latched_strip_width <= strip_width;
|
||||
latched_strip_height <= strip_height;
|
||||
latched_near <= near;
|
||||
latched_preset_maxval <= preset_maxval;
|
||||
latched_preset_t1 <= preset_t1;
|
||||
latched_preset_t2 <= preset_t2;
|
||||
latched_preset_t3 <= preset_t3;
|
||||
latched_preset_reset <= preset_reset;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
354
fpga/verilog/jls_input_ctrl.sv
Normal file
354
fpga/verilog/jls_input_ctrl.sv
Normal file
@@ -0,0 +1,354 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 Control procedure, Annex D.1-D.3 scan control
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Source image sample ordering before JPEG-LS encoding
|
||||
// Example : See docs/jls_module_interfaces.md
|
||||
//
|
||||
// Input controller for the JPEG-LS encoder. This module aligns the standard
|
||||
// synchronous FIFO read latency, waits for the original-image SOF sideband,
|
||||
// samples runtime configuration at the SOF pixel, and emits one registered
|
||||
// pixel event at a time for the downstream scan controller.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_input_ctrl #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Default image width used when runtime configuration is invalid.
|
||||
parameter int DEFAULT_PIC_COL = 6144,
|
||||
|
||||
// Default image height used when runtime configuration is invalid.
|
||||
parameter int DEFAULT_PIC_ROW = 256,
|
||||
|
||||
// Maximum supported runtime image width.
|
||||
parameter int MAX_PIC_COL = 6144,
|
||||
|
||||
// Maximum supported runtime image height.
|
||||
parameter int MAX_PIC_ROW = 4096,
|
||||
|
||||
// Number of original-image rows in one standalone JPEG-LS strip frame.
|
||||
parameter int SCAN_ROWS = 16,
|
||||
|
||||
// Packed input FIFO width: one SOF sideband bit per input byte lane.
|
||||
parameter int IFIFO_DATA_WIDTH = ((PIX_WIDTH + 7) / 8) * 9
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Runtime image width sampled when the first SOF pixel is accepted.
|
||||
input var logic [12:0] cfg_pic_col,
|
||||
|
||||
// Runtime image height sampled when the first SOF pixel is accepted.
|
||||
input var logic [12:0] cfg_pic_row,
|
||||
|
||||
// Runtime compression-ratio selector sampled when the first SOF pixel is accepted.
|
||||
input var logic [3:0] ratio,
|
||||
|
||||
// Input FIFO read clock, same frequency and phase as clk.
|
||||
output logic ififo_rclk,
|
||||
|
||||
// Input FIFO read request. FIFO data is valid on the next clk cycle.
|
||||
output logic ififo_rd,
|
||||
|
||||
// Packed SOF flag and grayscale sample from the input FIFO.
|
||||
input var logic [IFIFO_DATA_WIDTH-1:0] ififo_rdata,
|
||||
|
||||
// Input FIFO empty flag.
|
||||
input var logic ififo_empty,
|
||||
|
||||
// Input FIFO almost-empty flag used for read optimization.
|
||||
input var logic ififo_alempty,
|
||||
|
||||
// Downstream stage can accept the current pixel event.
|
||||
input var logic pixel_ready,
|
||||
|
||||
// Pause request from downstream or output-buffer flow control.
|
||||
input var logic pause_req,
|
||||
|
||||
// Pixel event valid flag.
|
||||
output logic pixel_valid,
|
||||
|
||||
// SOF sideband copied from the accepted FIFO word.
|
||||
output logic pixel_sof,
|
||||
|
||||
// Grayscale sample value copied from the accepted FIFO word.
|
||||
output logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Zero-based column coordinate in the original input image.
|
||||
output logic [12:0] pixel_x,
|
||||
|
||||
// Zero-based row coordinate in the original input image.
|
||||
output logic [12:0] pixel_y,
|
||||
|
||||
// First pixel of the current standalone strip frame.
|
||||
output logic strip_first_pixel,
|
||||
|
||||
// Last pixel of the current standalone strip frame.
|
||||
output logic strip_last_pixel,
|
||||
|
||||
// First pixel of the current original input image.
|
||||
output logic image_first_pixel,
|
||||
|
||||
// Last pixel of the current original input image.
|
||||
output logic image_last_pixel,
|
||||
|
||||
// Effective image width after runtime validation and fallback.
|
||||
output logic [12:0] active_pic_col,
|
||||
|
||||
// Effective image height after runtime validation and fallback.
|
||||
output logic [12:0] active_pic_row,
|
||||
|
||||
// Latched ratio for the current original input image.
|
||||
output logic [3:0] active_ratio,
|
||||
|
||||
// High when cfg_pic_col/cfg_pic_row passed validation at SOF.
|
||||
output logic active_cfg_valid,
|
||||
|
||||
// High while this controller is accepting pixels for an original image.
|
||||
output logic image_active
|
||||
);
|
||||
|
||||
localparam int SOF_BIT_INDEX = (PIX_WIDTH == 8) ? 8 : 17;
|
||||
|
||||
// Constant forms used in comparisons and registered outputs.
|
||||
localparam logic [12:0] MIN_PIC_COL_VALUE = 13'd16;
|
||||
localparam logic [12:0] MIN_PIC_ROW_VALUE = 13'd16;
|
||||
localparam logic [12:0] DEFAULT_PIC_COL_VALUE = DEFAULT_PIC_COL;
|
||||
localparam logic [12:0] DEFAULT_PIC_ROW_VALUE = DEFAULT_PIC_ROW;
|
||||
localparam logic [12:0] MAX_PIC_COL_VALUE = MAX_PIC_COL;
|
||||
localparam logic [12:0] MAX_PIC_ROW_VALUE = MAX_PIC_ROW;
|
||||
localparam logic [12:0] SCAN_ROWS_VALUE = SCAN_ROWS;
|
||||
localparam logic [12:0] SCAN_ROWS_LAST_VALUE = SCAN_ROWS - 1;
|
||||
|
||||
// One-cycle read latency marker for the synchronous input FIFO.
|
||||
logic rd_pending;
|
||||
|
||||
// Current coordinate for the next accepted in-frame FIFO word.
|
||||
logic [12:0] x_count;
|
||||
logic [12:0] y_count;
|
||||
logic [12:0] strip_row_count;
|
||||
|
||||
// Runtime configuration validation signals. They are sampled only at SOF.
|
||||
logic cfg_col_in_range;
|
||||
logic cfg_row_in_range;
|
||||
logic cfg_row_aligned;
|
||||
logic cfg_dimension_valid;
|
||||
logic [12:0] cfg_pic_col_selected;
|
||||
logic [12:0] cfg_pic_row_selected;
|
||||
|
||||
// FIFO read control signals.
|
||||
logic fifo_read_allowed;
|
||||
logic pixel_slot_open;
|
||||
logic issue_read;
|
||||
|
||||
// Decoded fields from the FIFO data word.
|
||||
logic fifo_word_sof;
|
||||
logic [PIX_WIDTH-1:0] fifo_word_sample;
|
||||
|
||||
// Coordinate boundary signals for the next accepted in-frame pixel.
|
||||
logic [12:0] active_pic_col_last;
|
||||
logic [12:0] active_pic_row_last;
|
||||
logic x_is_first;
|
||||
logic x_is_last;
|
||||
logic strip_row_is_first;
|
||||
logic y_is_last;
|
||||
logic strip_row_is_last;
|
||||
logic strip_first_active_pixel;
|
||||
|
||||
assign ififo_rclk = clk;
|
||||
assign ififo_rd = issue_read;
|
||||
assign fifo_word_sof = ififo_rdata[SOF_BIT_INDEX];
|
||||
assign fifo_word_sample = ififo_rdata[PIX_WIDTH-1:0];
|
||||
|
||||
always_comb begin
|
||||
cfg_col_in_range = 1'b0;
|
||||
if (cfg_pic_col >= MIN_PIC_COL_VALUE && cfg_pic_col <= MAX_PIC_COL_VALUE) begin
|
||||
cfg_col_in_range = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_row_in_range = 1'b0;
|
||||
if (cfg_pic_row >= MIN_PIC_ROW_VALUE && cfg_pic_row <= MAX_PIC_ROW_VALUE) begin
|
||||
cfg_row_in_range = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_row_aligned = 1'b0;
|
||||
if ((cfg_pic_row % SCAN_ROWS_VALUE) == 13'd0) begin
|
||||
cfg_row_aligned = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_dimension_valid = 1'b0;
|
||||
if (cfg_col_in_range && cfg_row_in_range && cfg_row_aligned) begin
|
||||
cfg_dimension_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_pic_col_selected = DEFAULT_PIC_COL_VALUE;
|
||||
cfg_pic_row_selected = DEFAULT_PIC_ROW_VALUE;
|
||||
if (cfg_dimension_valid) begin
|
||||
cfg_pic_col_selected = cfg_pic_col;
|
||||
cfg_pic_row_selected = cfg_pic_row;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
fifo_read_allowed = 1'b0;
|
||||
if (!ififo_alempty || !ififo_empty) begin
|
||||
fifo_read_allowed = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_slot_open = 1'b0;
|
||||
if (!pixel_valid || pixel_ready) begin
|
||||
pixel_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
issue_read = 1'b0;
|
||||
if (fifo_read_allowed && pixel_slot_open && !rd_pending && !pause_req) begin
|
||||
issue_read = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
active_pic_col_last = active_pic_col - 13'd1;
|
||||
active_pic_row_last = active_pic_row - 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
x_is_first = 1'b0;
|
||||
if (x_count == 13'd0) begin
|
||||
x_is_first = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
x_is_last = 1'b0;
|
||||
if (x_count == active_pic_col_last) begin
|
||||
x_is_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_row_is_first = 1'b0;
|
||||
if (strip_row_count == 13'd0) begin
|
||||
strip_row_is_first = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
y_is_last = 1'b0;
|
||||
if (y_count == active_pic_row_last) begin
|
||||
y_is_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_row_is_last = 1'b0;
|
||||
if (strip_row_count == SCAN_ROWS_LAST_VALUE) begin
|
||||
strip_row_is_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_first_active_pixel = 1'b0;
|
||||
if (x_is_first && strip_row_is_first) begin
|
||||
strip_first_active_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
rd_pending <= 1'b0;
|
||||
pixel_valid <= 1'b0;
|
||||
pixel_sof <= 1'b0;
|
||||
pixel_sample <= {PIX_WIDTH{1'b0}};
|
||||
pixel_x <= 13'd0;
|
||||
pixel_y <= 13'd0;
|
||||
strip_first_pixel <= 1'b0;
|
||||
strip_last_pixel <= 1'b0;
|
||||
image_first_pixel <= 1'b0;
|
||||
image_last_pixel <= 1'b0;
|
||||
active_pic_col <= DEFAULT_PIC_COL_VALUE;
|
||||
active_pic_row <= DEFAULT_PIC_ROW_VALUE;
|
||||
active_ratio <= 4'd0;
|
||||
active_cfg_valid <= 1'b0;
|
||||
image_active <= 1'b0;
|
||||
x_count <= 13'd0;
|
||||
y_count <= 13'd0;
|
||||
strip_row_count <= 13'd0;
|
||||
end else begin
|
||||
rd_pending <= issue_read;
|
||||
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
pixel_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (rd_pending) begin
|
||||
if (!image_active && fifo_word_sof) begin
|
||||
pixel_valid <= 1'b1;
|
||||
pixel_sof <= fifo_word_sof;
|
||||
pixel_sample <= fifo_word_sample;
|
||||
pixel_x <= 13'd0;
|
||||
pixel_y <= 13'd0;
|
||||
strip_first_pixel <= 1'b1;
|
||||
strip_last_pixel <= 1'b0;
|
||||
image_first_pixel <= 1'b1;
|
||||
image_last_pixel <= 1'b0;
|
||||
active_pic_col <= cfg_pic_col_selected;
|
||||
active_pic_row <= cfg_pic_row_selected;
|
||||
active_ratio <= ratio;
|
||||
active_cfg_valid <= cfg_dimension_valid;
|
||||
image_active <= 1'b1;
|
||||
x_count <= 13'd1;
|
||||
y_count <= 13'd0;
|
||||
strip_row_count <= 13'd0;
|
||||
end else if (image_active) begin
|
||||
pixel_valid <= 1'b1;
|
||||
pixel_sof <= fifo_word_sof;
|
||||
pixel_sample <= fifo_word_sample;
|
||||
pixel_x <= x_count;
|
||||
pixel_y <= y_count;
|
||||
strip_first_pixel <= strip_first_active_pixel;
|
||||
strip_last_pixel <= x_is_last && strip_row_is_last;
|
||||
image_first_pixel <= 1'b0;
|
||||
image_last_pixel <= x_is_last && y_is_last;
|
||||
|
||||
if (x_is_last) begin
|
||||
x_count <= 13'd0;
|
||||
if (y_is_last) begin
|
||||
y_count <= 13'd0;
|
||||
strip_row_count <= 13'd0;
|
||||
image_active <= 1'b0;
|
||||
end else begin
|
||||
y_count <= y_count + 13'd1;
|
||||
if (strip_row_is_last) begin
|
||||
strip_row_count <= 13'd0;
|
||||
end else begin
|
||||
strip_row_count <= strip_row_count + 13'd1;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
x_count <= x_count + 13'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
876
fpga/verilog/jls_mode_router.sv
Normal file
876
fpga/verilog/jls_mode_router.sv
Normal file
@@ -0,0 +1,876 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.7 run mode
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Select regular mode or run mode from local gradients
|
||||
// Trace : docs/jls_traceability.md#run-mode
|
||||
// Example : When D1=D2=D3=0 and X=Ra, a run pixel is reconstructed as Ra
|
||||
// and the accumulated run_length is not entropy coded until the
|
||||
// run reaches EOL or an interruption sample.
|
||||
//
|
||||
// First-pass mode router and run scanner. It consumes neighbor events from
|
||||
// jls_neighbor_provider, sends non-run contexts to the regular pipeline, and
|
||||
// accumulates run pixels for jls_run_mode segment encoding. To preserve
|
||||
// entropy order in the later top-level integration, this module stalls regular,
|
||||
// interruption, and EOL segment emission behind an outstanding run segment. It
|
||||
// may still accept later non-EOL matching run pixels because those pixels only
|
||||
// update run_length_accum and reconstructed history; they do not emit entropy
|
||||
// until a later segment boundary.
|
||||
// Once run_length_accum is non-zero, the scanner remains in the standard
|
||||
// Annex A.7 run loop; the next pixel is judged against RUNval/Ra and EOL, not
|
||||
// reclassified by the regular-mode gradient context.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_mode_router #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Neighbor event from jls_neighbor_provider.
|
||||
input var logic pixel_valid,
|
||||
output logic pixel_ready,
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
input var logic [12:0] pixel_x,
|
||||
input var logic [12:0] pixel_y,
|
||||
input var logic pixel_strip_first_pixel,
|
||||
input var logic pixel_strip_last_pixel,
|
||||
input var logic [PIX_WIDTH-1:0] Ra,
|
||||
input var logic [PIX_WIDTH-1:0] Rb,
|
||||
input var logic [PIX_WIDTH-1:0] Rc,
|
||||
input var logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// Active strip width and NEAR for mode/run decisions.
|
||||
input var logic [12:0] strip_width,
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Regular-mode event forwarded to jls_predictor.
|
||||
output logic regular_valid,
|
||||
input var logic regular_ready,
|
||||
output logic [PIX_WIDTH-1:0] regular_sample,
|
||||
output logic [12:0] regular_x,
|
||||
output logic [12:0] regular_y,
|
||||
output logic regular_strip_first_pixel,
|
||||
output logic regular_strip_last_pixel,
|
||||
output logic [PIX_WIDTH-1:0] regular_Ra,
|
||||
output logic [PIX_WIDTH-1:0] regular_Rb,
|
||||
output logic [PIX_WIDTH-1:0] regular_Rc,
|
||||
output logic [PIX_WIDTH-1:0] regular_Rd,
|
||||
|
||||
// Run segment event for jls_run_mode.
|
||||
output logic run_segment_valid,
|
||||
input var logic run_segment_ready,
|
||||
output logic [12:0] run_length,
|
||||
output logic run_end_of_line,
|
||||
output logic run_interruption_valid,
|
||||
output logic [PIX_WIDTH-1:0] run_interruption_sample,
|
||||
output logic [12:0] run_interruption_x,
|
||||
output logic [12:0] run_interruption_y,
|
||||
output logic run_interruption_strip_first_pixel,
|
||||
output logic run_interruption_strip_last_pixel,
|
||||
output logic [PIX_WIDTH-1:0] run_Ra,
|
||||
output logic [PIX_WIDTH-1:0] run_Rb,
|
||||
|
||||
// jls_run_mode segment completion. The router does not accept the next pixel
|
||||
// until the segment's entropy events and optional interruption reconstruction
|
||||
// have completed.
|
||||
input var logic run_segment_done,
|
||||
|
||||
// Direct reconstructed run pixel. Run-interruption reconstruction comes from
|
||||
// jls_run_mode and is muxed at top level with this port and the regular path.
|
||||
output logic run_recon_valid,
|
||||
input var logic run_recon_ready,
|
||||
output logic [PIX_WIDTH-1:0] run_recon_sample,
|
||||
output logic [12:0] run_recon_x,
|
||||
output logic [12:0] run_recon_y
|
||||
);
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
STATE_IDLE = 2'd0,
|
||||
STATE_WAIT_SEG = 2'd1
|
||||
} router_state_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
EVENT_REGULAR = 2'd0,
|
||||
EVENT_RUN_PIXEL = 2'd1,
|
||||
EVENT_RUN_EOF_LINE = 2'd2,
|
||||
EVENT_INTERRUPT = 2'd3
|
||||
} event_kind_e;
|
||||
|
||||
router_state_e state;
|
||||
event_kind_e event_kind;
|
||||
event_kind_e event_kind_next;
|
||||
|
||||
// Accumulated run length since the current run-mode segment started.
|
||||
logic [12:0] run_length_accum;
|
||||
|
||||
// Two-entry elastic input stage. This breaks the long combinational path
|
||||
// from neighbor history values through Annex A.3/A.7 decisions into the
|
||||
// downstream entropy ready chain. The second slot lets pixel_ready depend
|
||||
// only on local queue fullness instead of same-cycle downstream event_accept.
|
||||
logic slot_valid;
|
||||
logic [PIX_WIDTH-1:0] slot_sample;
|
||||
logic [12:0] slot_x;
|
||||
logic [12:0] slot_y;
|
||||
logic slot_strip_first_pixel;
|
||||
logic slot_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] slot_Ra;
|
||||
logic [PIX_WIDTH-1:0] slot_Rb;
|
||||
logic [PIX_WIDTH-1:0] slot_Rc;
|
||||
logic [PIX_WIDTH-1:0] slot_Rd;
|
||||
logic signed [32:0] slot_D1;
|
||||
logic signed [32:0] slot_D2;
|
||||
logic signed [32:0] slot_D3;
|
||||
logic signed [32:0] slot_sample_minus_Ra;
|
||||
logic slot_pixel_is_eol;
|
||||
logic next_slot_valid;
|
||||
logic [PIX_WIDTH-1:0] next_slot_sample;
|
||||
logic [12:0] next_slot_x;
|
||||
logic [12:0] next_slot_y;
|
||||
logic next_slot_strip_first_pixel;
|
||||
logic next_slot_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Ra;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Rb;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Rc;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Rd;
|
||||
logic signed [32:0] next_slot_D1;
|
||||
logic signed [32:0] next_slot_D2;
|
||||
logic signed [32:0] next_slot_D3;
|
||||
logic signed [32:0] next_slot_sample_minus_Ra;
|
||||
logic next_slot_pixel_is_eol;
|
||||
|
||||
// One-entry classified slot. Annex A.3/A.7 gradient and run decisions are
|
||||
// registered here before the event queue sees them; this avoids driving the
|
||||
// event register enables directly from Ra/Rb/Rc/Rd comparison logic.
|
||||
logic class_valid;
|
||||
logic class_slot_open;
|
||||
logic class_can_enqueue;
|
||||
logic class_to_event;
|
||||
logic [PIX_WIDTH-1:0] class_sample;
|
||||
logic [12:0] class_x;
|
||||
logic [12:0] class_y;
|
||||
logic class_strip_first_pixel;
|
||||
logic class_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] class_Ra;
|
||||
logic [PIX_WIDTH-1:0] class_Rb;
|
||||
logic [PIX_WIDTH-1:0] class_Rc;
|
||||
logic [PIX_WIDTH-1:0] class_Rd;
|
||||
event_kind_e class_kind;
|
||||
logic [12:0] class_run_length;
|
||||
logic class_run_end_of_line;
|
||||
logic class_run_interruption_valid;
|
||||
|
||||
// Registered classified event. This is the timing boundary between Annex
|
||||
// A.3/A.7 mode decision and the downstream regular/run entropy pipelines.
|
||||
logic event_valid;
|
||||
logic [PIX_WIDTH-1:0] event_sample;
|
||||
logic [12:0] event_x;
|
||||
logic [12:0] event_y;
|
||||
logic event_strip_first_pixel;
|
||||
logic event_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] event_Ra;
|
||||
logic [PIX_WIDTH-1:0] event_Rb;
|
||||
logic [PIX_WIDTH-1:0] event_Rc;
|
||||
logic [PIX_WIDTH-1:0] event_Rd;
|
||||
logic [12:0] event_run_length;
|
||||
logic event_run_end_of_line;
|
||||
logic event_run_interruption_valid;
|
||||
logic [12:0] event_run_length_next;
|
||||
logic event_run_end_of_line_next;
|
||||
logic event_run_interruption_valid_next;
|
||||
// Second event slot. A filled second slot lets mode classification advance
|
||||
// without using the downstream ready chain in the same cycle.
|
||||
logic event_next_valid;
|
||||
logic [PIX_WIDTH-1:0] event_next_sample;
|
||||
logic [12:0] event_next_x;
|
||||
logic [12:0] event_next_y;
|
||||
logic event_next_strip_first_pixel;
|
||||
logic event_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] event_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] event_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] event_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] event_next_Rd;
|
||||
event_kind_e event_next_kind;
|
||||
logic [12:0] event_next_run_length;
|
||||
logic event_next_run_end_of_line;
|
||||
logic event_next_run_interruption_valid;
|
||||
|
||||
// Current input-pixel arithmetic. Only the first subtract layer is computed
|
||||
// before the slot register; the absolute-value / compare tree for Annex
|
||||
// A.3/A.7 runs one cycle later from slot_D1/slot_D2/slot_D3.
|
||||
logic signed [32:0] pixel_Ra_ext;
|
||||
logic signed [32:0] pixel_Rb_ext;
|
||||
logic signed [32:0] pixel_Rc_ext;
|
||||
logic signed [32:0] pixel_Rd_ext;
|
||||
logic signed [32:0] pixel_sample_ext;
|
||||
logic signed [32:0] near_ext33;
|
||||
logic signed [32:0] pixel_D1;
|
||||
logic signed [32:0] pixel_D2;
|
||||
logic signed [32:0] pixel_D3;
|
||||
logic signed [32:0] pixel_sample_minus_Ra;
|
||||
logic signed [32:0] slot_abs_D1;
|
||||
logic signed [32:0] slot_abs_D2;
|
||||
logic signed [32:0] slot_abs_D3;
|
||||
logic signed [32:0] slot_abs_sample_minus_Ra;
|
||||
logic slot_run_context_eval;
|
||||
logic slot_sample_matches_Ra_eval;
|
||||
logic pixel_is_eol_next;
|
||||
logic run_mode_active;
|
||||
logic slot_to_class;
|
||||
logic event_accept;
|
||||
logic slot_accept;
|
||||
logic load_pixel;
|
||||
logic input_queue_full;
|
||||
logic run_pixel_accept;
|
||||
logic interruption_accept;
|
||||
logic regular_accept;
|
||||
logic run_eol_segment_accept;
|
||||
logic [12:0] run_length_with_current;
|
||||
logic event_queue_full;
|
||||
logic promote_next_event;
|
||||
logic load_event_front_from_class;
|
||||
logic load_event_next_from_class;
|
||||
|
||||
// Padding for supported PIX_WIDTH values into 33-bit signed arithmetic.
|
||||
localparam int SAMPLE_EXT_PAD_WIDTH = 33 - PIX_WIDTH;
|
||||
|
||||
always_comb begin
|
||||
pixel_Ra_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Ra});
|
||||
pixel_Rb_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rb});
|
||||
pixel_Rc_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rc});
|
||||
pixel_Rd_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rd});
|
||||
pixel_sample_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, pixel_sample});
|
||||
near_ext33 = $signed({27'd0, NEAR});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_D1 = pixel_Rd_ext - pixel_Rb_ext;
|
||||
pixel_D2 = pixel_Rb_ext - pixel_Rc_ext;
|
||||
pixel_D3 = pixel_Rc_ext - pixel_Ra_ext;
|
||||
pixel_sample_minus_Ra = pixel_sample_ext - pixel_Ra_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_D1 = slot_D1;
|
||||
if (slot_D1 < 33'sd0) begin
|
||||
slot_abs_D1 = -slot_D1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_D2 = slot_D2;
|
||||
if (slot_D2 < 33'sd0) begin
|
||||
slot_abs_D2 = -slot_D2;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_D3 = slot_D3;
|
||||
if (slot_D3 < 33'sd0) begin
|
||||
slot_abs_D3 = -slot_D3;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_sample_minus_Ra = slot_sample_minus_Ra;
|
||||
if (slot_sample_minus_Ra < 33'sd0) begin
|
||||
slot_abs_sample_minus_Ra = -slot_sample_minus_Ra;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_run_context_eval = 1'b0;
|
||||
if (slot_abs_D1 <= near_ext33 &&
|
||||
slot_abs_D2 <= near_ext33 &&
|
||||
slot_abs_D3 <= near_ext33) begin
|
||||
slot_run_context_eval = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_sample_matches_Ra_eval = 1'b0;
|
||||
if (slot_abs_sample_minus_Ra <= near_ext33) begin
|
||||
slot_sample_matches_Ra_eval = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_is_eol_next = 1'b0;
|
||||
if (pixel_x == (strip_width - 13'd1)) begin
|
||||
pixel_is_eol_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_mode_active = slot_run_context_eval;
|
||||
if (run_length_accum != 13'd0) begin
|
||||
run_mode_active = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_length_with_current = run_length_accum + 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
regular_valid = 1'b0;
|
||||
if (event_valid && event_kind == EVENT_REGULAR) begin
|
||||
regular_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_recon_valid = 1'b0;
|
||||
if (event_valid &&
|
||||
(event_kind == EVENT_RUN_PIXEL || event_kind == EVENT_RUN_EOF_LINE)) begin
|
||||
run_recon_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_segment_valid = 1'b0;
|
||||
if (event_valid &&
|
||||
(event_kind == EVENT_RUN_EOF_LINE || event_kind == EVENT_INTERRUPT)) begin
|
||||
run_segment_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
regular_accept = 1'b0;
|
||||
if (regular_valid && regular_ready) begin
|
||||
regular_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_pixel_accept = 1'b0;
|
||||
if (run_recon_valid && run_recon_ready && event_kind == EVENT_RUN_PIXEL) begin
|
||||
run_pixel_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_eol_segment_accept = 1'b0;
|
||||
if (run_recon_valid && run_recon_ready && run_segment_valid && run_segment_ready &&
|
||||
event_kind == EVENT_RUN_EOF_LINE) begin
|
||||
run_eol_segment_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
interruption_accept = 1'b0;
|
||||
if (run_segment_valid && run_segment_ready && event_kind == EVENT_INTERRUPT) begin
|
||||
interruption_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_accept = 1'b0;
|
||||
case (1'b1)
|
||||
(regular_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
(run_pixel_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
(run_eol_segment_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
(interruption_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
default: begin
|
||||
event_accept = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_accept = slot_to_class;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_kind_next = EVENT_REGULAR;
|
||||
if (run_mode_active) begin
|
||||
if (slot_sample_matches_Ra_eval && slot_pixel_is_eol) begin
|
||||
event_kind_next = EVENT_RUN_EOF_LINE;
|
||||
end else if (slot_sample_matches_Ra_eval) begin
|
||||
event_kind_next = EVENT_RUN_PIXEL;
|
||||
end else begin
|
||||
event_kind_next = EVENT_INTERRUPT;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_run_length_next = run_length_accum;
|
||||
event_run_end_of_line_next = 1'b0;
|
||||
event_run_interruption_valid_next = 1'b1;
|
||||
if (event_kind_next == EVENT_RUN_EOF_LINE) begin
|
||||
event_run_length_next = run_length_with_current;
|
||||
event_run_end_of_line_next = 1'b1;
|
||||
event_run_interruption_valid_next = 1'b0;
|
||||
end else if (event_kind_next == EVENT_RUN_PIXEL) begin
|
||||
event_run_length_next = run_length_with_current;
|
||||
event_run_end_of_line_next = 1'b0;
|
||||
event_run_interruption_valid_next = 1'b0;
|
||||
end else if (event_kind_next == EVENT_REGULAR) begin
|
||||
event_run_interruption_valid_next = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_queue_full = 1'b0;
|
||||
if (slot_valid && next_slot_valid) begin
|
||||
input_queue_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_queue_full = 1'b0;
|
||||
if (event_valid && event_next_valid) begin
|
||||
event_queue_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
class_slot_open = 1'b0;
|
||||
if (!class_valid || (class_to_event && class_kind == EVENT_REGULAR)) begin
|
||||
class_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_to_class = 1'b0;
|
||||
if (slot_valid && class_slot_open) begin
|
||||
slot_to_class = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
class_can_enqueue = 1'b0;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
class_can_enqueue = 1'b1;
|
||||
end
|
||||
|
||||
STATE_WAIT_SEG: begin
|
||||
if (class_kind == EVENT_RUN_PIXEL) begin
|
||||
class_can_enqueue = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
class_can_enqueue = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
class_to_event = 1'b0;
|
||||
if (class_valid && class_can_enqueue && !event_queue_full) begin
|
||||
class_to_event = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
promote_next_event = 1'b0;
|
||||
if (!event_valid && event_next_valid) begin
|
||||
promote_next_event = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_event_front_from_class = 1'b0;
|
||||
if (class_to_event && !event_valid && !event_next_valid) begin
|
||||
load_event_front_from_class = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_event_next_from_class = 1'b0;
|
||||
if (class_to_event && (event_valid || event_next_valid)) begin
|
||||
load_event_next_from_class = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = 1'b0;
|
||||
if (!input_queue_full) begin
|
||||
pixel_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_pixel = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
load_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
regular_sample = event_sample;
|
||||
regular_x = event_x;
|
||||
regular_y = event_y;
|
||||
regular_strip_first_pixel = event_strip_first_pixel;
|
||||
regular_strip_last_pixel = event_strip_last_pixel;
|
||||
regular_Ra = event_Ra;
|
||||
regular_Rb = event_Rb;
|
||||
regular_Rc = event_Rc;
|
||||
regular_Rd = event_Rd;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_length = event_run_length;
|
||||
run_end_of_line = event_run_end_of_line;
|
||||
run_interruption_valid = event_run_interruption_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_interruption_sample = event_sample;
|
||||
run_interruption_x = event_x;
|
||||
run_interruption_y = event_y;
|
||||
run_interruption_strip_first_pixel = event_strip_first_pixel;
|
||||
run_interruption_strip_last_pixel = event_strip_last_pixel;
|
||||
run_Ra = event_Ra;
|
||||
run_Rb = event_Rb;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_recon_sample = event_Ra;
|
||||
run_recon_x = event_x;
|
||||
run_recon_y = event_y;
|
||||
end
|
||||
|
||||
/*
|
||||
* The event register above owns all external regular/run outputs. The older
|
||||
* direct slot-to-output combinational blocks are intentionally absent; this
|
||||
* keeps slot_Ra/Rb/Rc/Rd comparison logic from feeding top-level entropy
|
||||
* counters or run-mode DSP inputs in the same cycle.
|
||||
*/
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
run_length_accum <= 13'd0;
|
||||
slot_valid <= 1'b0;
|
||||
slot_sample <= {PIX_WIDTH{1'b0}};
|
||||
slot_x <= 13'd0;
|
||||
slot_y <= 13'd0;
|
||||
slot_strip_first_pixel <= 1'b0;
|
||||
slot_strip_last_pixel <= 1'b0;
|
||||
slot_Ra <= {PIX_WIDTH{1'b0}};
|
||||
slot_Rb <= {PIX_WIDTH{1'b0}};
|
||||
slot_Rc <= {PIX_WIDTH{1'b0}};
|
||||
slot_Rd <= {PIX_WIDTH{1'b0}};
|
||||
slot_D1 <= 33'sd0;
|
||||
slot_D2 <= 33'sd0;
|
||||
slot_D3 <= 33'sd0;
|
||||
slot_sample_minus_Ra <= 33'sd0;
|
||||
slot_pixel_is_eol <= 1'b0;
|
||||
next_slot_valid <= 1'b0;
|
||||
next_slot_sample <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_x <= 13'd0;
|
||||
next_slot_y <= 13'd0;
|
||||
next_slot_strip_first_pixel <= 1'b0;
|
||||
next_slot_strip_last_pixel <= 1'b0;
|
||||
next_slot_Ra <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_Rb <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_Rc <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_Rd <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_D1 <= 33'sd0;
|
||||
next_slot_D2 <= 33'sd0;
|
||||
next_slot_D3 <= 33'sd0;
|
||||
next_slot_sample_minus_Ra <= 33'sd0;
|
||||
next_slot_pixel_is_eol <= 1'b0;
|
||||
class_valid <= 1'b0;
|
||||
class_sample <= {PIX_WIDTH{1'b0}};
|
||||
class_x <= 13'd0;
|
||||
class_y <= 13'd0;
|
||||
class_strip_first_pixel <= 1'b0;
|
||||
class_strip_last_pixel <= 1'b0;
|
||||
class_Ra <= {PIX_WIDTH{1'b0}};
|
||||
class_Rb <= {PIX_WIDTH{1'b0}};
|
||||
class_Rc <= {PIX_WIDTH{1'b0}};
|
||||
class_Rd <= {PIX_WIDTH{1'b0}};
|
||||
class_kind <= EVENT_REGULAR;
|
||||
class_run_length <= 13'd0;
|
||||
class_run_end_of_line <= 1'b0;
|
||||
class_run_interruption_valid <= 1'b0;
|
||||
event_kind <= EVENT_REGULAR;
|
||||
event_valid <= 1'b0;
|
||||
event_sample <= {PIX_WIDTH{1'b0}};
|
||||
event_x <= 13'd0;
|
||||
event_y <= 13'd0;
|
||||
event_strip_first_pixel <= 1'b0;
|
||||
event_strip_last_pixel <= 1'b0;
|
||||
event_Ra <= {PIX_WIDTH{1'b0}};
|
||||
event_Rb <= {PIX_WIDTH{1'b0}};
|
||||
event_Rc <= {PIX_WIDTH{1'b0}};
|
||||
event_Rd <= {PIX_WIDTH{1'b0}};
|
||||
event_run_length <= 13'd0;
|
||||
event_run_end_of_line <= 1'b0;
|
||||
event_run_interruption_valid <= 1'b0;
|
||||
event_next_valid <= 1'b0;
|
||||
event_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
event_next_x <= 13'd0;
|
||||
event_next_y <= 13'd0;
|
||||
event_next_strip_first_pixel <= 1'b0;
|
||||
event_next_strip_last_pixel <= 1'b0;
|
||||
event_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
event_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
event_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
event_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
event_next_kind <= EVENT_REGULAR;
|
||||
event_next_run_length <= 13'd0;
|
||||
event_next_run_end_of_line <= 1'b0;
|
||||
event_next_run_interruption_valid <= 1'b0;
|
||||
end else begin
|
||||
if (state == STATE_WAIT_SEG && run_segment_done) begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
|
||||
if (event_accept) begin
|
||||
event_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (class_to_event) begin
|
||||
class_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (slot_to_class) begin
|
||||
class_valid <= 1'b1;
|
||||
class_sample <= slot_sample;
|
||||
class_x <= slot_x;
|
||||
class_y <= slot_y;
|
||||
class_strip_first_pixel <= slot_strip_first_pixel;
|
||||
class_strip_last_pixel <= slot_strip_last_pixel;
|
||||
class_Ra <= slot_Ra;
|
||||
class_Rb <= slot_Rb;
|
||||
class_Rc <= slot_Rc;
|
||||
class_Rd <= slot_Rd;
|
||||
class_kind <= event_kind_next;
|
||||
class_run_length <= event_run_length_next;
|
||||
class_run_end_of_line <= event_run_end_of_line_next;
|
||||
class_run_interruption_valid <= event_run_interruption_valid_next;
|
||||
end
|
||||
|
||||
if (promote_next_event) begin
|
||||
event_valid <= 1'b1;
|
||||
event_kind <= event_next_kind;
|
||||
event_sample <= event_next_sample;
|
||||
event_x <= event_next_x;
|
||||
event_y <= event_next_y;
|
||||
event_strip_first_pixel <= event_next_strip_first_pixel;
|
||||
event_strip_last_pixel <= event_next_strip_last_pixel;
|
||||
event_Ra <= event_next_Ra;
|
||||
event_Rb <= event_next_Rb;
|
||||
event_Rc <= event_next_Rc;
|
||||
event_Rd <= event_next_Rd;
|
||||
event_run_length <= event_next_run_length;
|
||||
event_run_end_of_line <= event_next_run_end_of_line;
|
||||
event_run_interruption_valid <= event_next_run_interruption_valid;
|
||||
event_next_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (load_event_front_from_class) begin
|
||||
event_valid <= 1'b1;
|
||||
event_kind <= class_kind;
|
||||
event_sample <= class_sample;
|
||||
event_x <= class_x;
|
||||
event_y <= class_y;
|
||||
event_strip_first_pixel <= class_strip_first_pixel;
|
||||
event_strip_last_pixel <= class_strip_last_pixel;
|
||||
event_Ra <= class_Ra;
|
||||
event_Rb <= class_Rb;
|
||||
event_Rc <= class_Rc;
|
||||
event_Rd <= class_Rd;
|
||||
event_run_length <= class_run_length;
|
||||
event_run_end_of_line <= class_run_end_of_line;
|
||||
event_run_interruption_valid <= class_run_interruption_valid;
|
||||
end
|
||||
|
||||
if (load_event_next_from_class) begin
|
||||
event_next_valid <= 1'b1;
|
||||
event_next_kind <= class_kind;
|
||||
event_next_sample <= class_sample;
|
||||
event_next_x <= class_x;
|
||||
event_next_y <= class_y;
|
||||
event_next_strip_first_pixel <= class_strip_first_pixel;
|
||||
event_next_strip_last_pixel <= class_strip_last_pixel;
|
||||
event_next_Ra <= class_Ra;
|
||||
event_next_Rb <= class_Rb;
|
||||
event_next_Rc <= class_Rc;
|
||||
event_next_Rd <= class_Rd;
|
||||
event_next_run_length <= class_run_length;
|
||||
event_next_run_end_of_line <= class_run_end_of_line;
|
||||
event_next_run_interruption_valid <= class_run_interruption_valid;
|
||||
end
|
||||
|
||||
if (class_to_event) begin
|
||||
case (class_kind)
|
||||
EVENT_RUN_PIXEL: begin
|
||||
run_length_accum <= class_run_length;
|
||||
end
|
||||
|
||||
EVENT_RUN_EOF_LINE, EVENT_INTERRUPT: begin
|
||||
run_length_accum <= 13'd0;
|
||||
state <= STATE_WAIT_SEG;
|
||||
end
|
||||
|
||||
default: begin
|
||||
run_length_accum <= run_length_accum;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
case ({slot_to_class, slot_valid, next_slot_valid, load_pixel})
|
||||
4'b1100: begin
|
||||
slot_valid <= 1'b0;
|
||||
end
|
||||
|
||||
4'b1101: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= pixel_sample;
|
||||
slot_x <= pixel_x;
|
||||
slot_y <= pixel_y;
|
||||
slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
slot_Ra <= Ra;
|
||||
slot_Rb <= Rb;
|
||||
slot_Rc <= Rc;
|
||||
slot_Rd <= Rd;
|
||||
slot_D1 <= pixel_D1;
|
||||
slot_D2 <= pixel_D2;
|
||||
slot_D3 <= pixel_D3;
|
||||
slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
4'b1110: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= next_slot_sample;
|
||||
slot_x <= next_slot_x;
|
||||
slot_y <= next_slot_y;
|
||||
slot_strip_first_pixel <= next_slot_strip_first_pixel;
|
||||
slot_strip_last_pixel <= next_slot_strip_last_pixel;
|
||||
slot_Ra <= next_slot_Ra;
|
||||
slot_Rb <= next_slot_Rb;
|
||||
slot_Rc <= next_slot_Rc;
|
||||
slot_Rd <= next_slot_Rd;
|
||||
slot_D1 <= next_slot_D1;
|
||||
slot_D2 <= next_slot_D2;
|
||||
slot_D3 <= next_slot_D3;
|
||||
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= next_slot_pixel_is_eol;
|
||||
next_slot_valid <= 1'b0;
|
||||
end
|
||||
|
||||
4'b1111: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= next_slot_sample;
|
||||
slot_x <= next_slot_x;
|
||||
slot_y <= next_slot_y;
|
||||
slot_strip_first_pixel <= next_slot_strip_first_pixel;
|
||||
slot_strip_last_pixel <= next_slot_strip_last_pixel;
|
||||
slot_Ra <= next_slot_Ra;
|
||||
slot_Rb <= next_slot_Rb;
|
||||
slot_Rc <= next_slot_Rc;
|
||||
slot_Rd <= next_slot_Rd;
|
||||
slot_D1 <= next_slot_D1;
|
||||
slot_D2 <= next_slot_D2;
|
||||
slot_D3 <= next_slot_D3;
|
||||
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= next_slot_pixel_is_eol;
|
||||
next_slot_valid <= 1'b1;
|
||||
next_slot_sample <= pixel_sample;
|
||||
next_slot_x <= pixel_x;
|
||||
next_slot_y <= pixel_y;
|
||||
next_slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
next_slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
next_slot_Ra <= Ra;
|
||||
next_slot_Rb <= Rb;
|
||||
next_slot_Rc <= Rc;
|
||||
next_slot_Rd <= Rd;
|
||||
next_slot_D1 <= pixel_D1;
|
||||
next_slot_D2 <= pixel_D2;
|
||||
next_slot_D3 <= pixel_D3;
|
||||
next_slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
next_slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
4'b0001: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= pixel_sample;
|
||||
slot_x <= pixel_x;
|
||||
slot_y <= pixel_y;
|
||||
slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
slot_Ra <= Ra;
|
||||
slot_Rb <= Rb;
|
||||
slot_Rc <= Rc;
|
||||
slot_Rd <= Rd;
|
||||
slot_D1 <= pixel_D1;
|
||||
slot_D2 <= pixel_D2;
|
||||
slot_D3 <= pixel_D3;
|
||||
slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
4'b0101, 4'b0111: begin
|
||||
next_slot_valid <= 1'b1;
|
||||
next_slot_sample <= pixel_sample;
|
||||
next_slot_x <= pixel_x;
|
||||
next_slot_y <= pixel_y;
|
||||
next_slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
next_slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
next_slot_Ra <= Ra;
|
||||
next_slot_Rb <= Rb;
|
||||
next_slot_Rc <= Rc;
|
||||
next_slot_Rd <= Rd;
|
||||
next_slot_D1 <= pixel_D1;
|
||||
next_slot_D2 <= pixel_D2;
|
||||
next_slot_D3 <= pixel_D3;
|
||||
next_slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
next_slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
default: begin
|
||||
slot_valid <= slot_valid;
|
||||
next_slot_valid <= next_slot_valid;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (!slot_valid && next_slot_valid && !slot_to_class && !load_pixel) begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= next_slot_sample;
|
||||
slot_x <= next_slot_x;
|
||||
slot_y <= next_slot_y;
|
||||
slot_strip_first_pixel <= next_slot_strip_first_pixel;
|
||||
slot_strip_last_pixel <= next_slot_strip_last_pixel;
|
||||
slot_Ra <= next_slot_Ra;
|
||||
slot_Rb <= next_slot_Rb;
|
||||
slot_Rc <= next_slot_Rc;
|
||||
slot_Rd <= next_slot_Rd;
|
||||
slot_D1 <= next_slot_D1;
|
||||
slot_D2 <= next_slot_D2;
|
||||
slot_D3 <= next_slot_D3;
|
||||
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= next_slot_pixel_is_eol;
|
||||
next_slot_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
264
fpga/verilog/jls_near_ctrl.sv
Normal file
264
fpga/verilog/jls_near_ctrl.sv
Normal file
@@ -0,0 +1,264 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.3 scan header NEAR parameter; Annex A uses NEAR in coding
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Project dynamic NEAR control around the standard NEAR parameter
|
||||
// Trace : docs/jls_traceability.md#dynamic-near-control
|
||||
// Example : For ratio=2, target bits are source bits divided by 4.
|
||||
//
|
||||
// Dynamic NEAR controller. This project-specific controller keeps NEAR at 0
|
||||
// for lossless/invalid ratios and applies a simple cumulative actual-vs-target
|
||||
// step after each standalone strip frame is fully output.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_near_ctrl #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Maximum dynamic NEAR allowed by the first RTL version.
|
||||
parameter int MAX_NEAR = 31
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// First strip of a new original image; resets dynamic NEAR to 0.
|
||||
input var logic image_start_valid,
|
||||
|
||||
// Runtime ratio sampled for the new original image.
|
||||
input var logic [3:0] image_ratio,
|
||||
|
||||
// Current strip frame is completely output, including header, payload, and EOI.
|
||||
input var logic strip_done_valid,
|
||||
|
||||
// Number of original-image pixels in the completed strip frame.
|
||||
input var logic [31:0] strip_pixel_count,
|
||||
|
||||
// Number of output bytes generated by the completed strip frame.
|
||||
input var logic [31:0] strip_output_bytes,
|
||||
|
||||
// NEAR value to use for the next strip frame header and coding pipeline.
|
||||
output logic [5:0] current_near,
|
||||
|
||||
// Cumulative actual output bits for verification and reporting.
|
||||
output logic [47:0] actual_bits_cumulative,
|
||||
|
||||
// Cumulative target bits for verification and reporting.
|
||||
output logic [47:0] target_bits_cumulative,
|
||||
|
||||
// Sticky report flag: target still missed while NEAR was already at MAX_NEAR.
|
||||
output logic target_miss_at_max_near,
|
||||
|
||||
// One-cycle delayed update is active. The top level holds the next strip
|
||||
// start while this is high so the scan header observes the updated NEAR.
|
||||
output logic update_busy
|
||||
);
|
||||
|
||||
// Ratio encodings follow the SRS ratio port definition.
|
||||
localparam logic [3:0] RATIO_LOSSLESS = 4'd0;
|
||||
localparam logic [3:0] RATIO_1_TO_2 = 4'd1;
|
||||
localparam logic [3:0] RATIO_1_TO_4 = 4'd2;
|
||||
localparam logic [3:0] RATIO_1_TO_8 = 4'd3;
|
||||
|
||||
// Saturated project maximum NEAR value.
|
||||
localparam logic [5:0] MAX_NEAR_VALUE = MAX_NEAR[5:0];
|
||||
|
||||
// Latched ratio for the current original image.
|
||||
logic [3:0] active_ratio;
|
||||
|
||||
// Strip-level source and target bit calculations.
|
||||
logic [47:0] strip_pixel_count_ext;
|
||||
logic [47:0] strip_source_bits;
|
||||
logic [47:0] strip_target_bits;
|
||||
logic [47:0] strip_actual_bits;
|
||||
|
||||
// Cumulative sums after adding the current completed strip.
|
||||
logic [47:0] actual_bits_sum;
|
||||
logic [47:0] target_bits_sum;
|
||||
|
||||
// Registered strip-completion update. This splits the 48-bit adders from
|
||||
// the actual-vs-target compare and NEAR step logic for 250 MHz timing.
|
||||
logic pending_update_valid;
|
||||
logic [47:0] pending_actual_bits_sum;
|
||||
logic [47:0] pending_target_bits_sum;
|
||||
logic pending_ratio_is_lossless_or_invalid;
|
||||
|
||||
// Ratio classification and NEAR update decisions.
|
||||
logic ratio_is_lossless_or_invalid;
|
||||
logic actual_over_target;
|
||||
logic actual_under_target;
|
||||
logic near_can_increase;
|
||||
logic near_can_decrease;
|
||||
logic near_is_max;
|
||||
|
||||
always_comb begin
|
||||
update_busy = pending_update_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_pixel_count_ext = {16'd0, strip_pixel_count};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_source_bits = {48{1'b0}};
|
||||
case (PIX_WIDTH)
|
||||
8: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000};
|
||||
end
|
||||
|
||||
10: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
|
||||
{strip_pixel_count_ext[46:0], 1'b0};
|
||||
end
|
||||
|
||||
12: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
|
||||
{strip_pixel_count_ext[45:0], 2'b00};
|
||||
end
|
||||
|
||||
14: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
|
||||
{strip_pixel_count_ext[45:0], 2'b00} +
|
||||
{strip_pixel_count_ext[46:0], 1'b0};
|
||||
end
|
||||
|
||||
16: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[43:0], 4'b0000};
|
||||
end
|
||||
|
||||
default: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[43:0], 4'b0000};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_target_bits = strip_source_bits;
|
||||
case (active_ratio)
|
||||
RATIO_1_TO_2: begin
|
||||
strip_target_bits = {1'b0, strip_source_bits[47:1]};
|
||||
end
|
||||
|
||||
RATIO_1_TO_4: begin
|
||||
strip_target_bits = {2'b00, strip_source_bits[47:2]};
|
||||
end
|
||||
|
||||
RATIO_1_TO_8: begin
|
||||
strip_target_bits = {3'b000, strip_source_bits[47:3]};
|
||||
end
|
||||
|
||||
default: begin
|
||||
strip_target_bits = strip_source_bits;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_actual_bits = {13'd0, strip_output_bytes, 3'b000};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
actual_bits_sum = actual_bits_cumulative + strip_actual_bits;
|
||||
target_bits_sum = target_bits_cumulative + strip_target_bits;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ratio_is_lossless_or_invalid = 1'b0;
|
||||
case (active_ratio)
|
||||
RATIO_1_TO_2: ratio_is_lossless_or_invalid = 1'b0;
|
||||
RATIO_1_TO_4: ratio_is_lossless_or_invalid = 1'b0;
|
||||
RATIO_1_TO_8: ratio_is_lossless_or_invalid = 1'b0;
|
||||
default: ratio_is_lossless_or_invalid = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
actual_over_target = 1'b0;
|
||||
if (pending_actual_bits_sum > pending_target_bits_sum) begin
|
||||
actual_over_target = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
actual_under_target = 1'b0;
|
||||
if (pending_actual_bits_sum < pending_target_bits_sum) begin
|
||||
actual_under_target = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_is_max = 1'b0;
|
||||
if (current_near >= MAX_NEAR_VALUE) begin
|
||||
near_is_max = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_can_increase = 1'b0;
|
||||
if (!pending_ratio_is_lossless_or_invalid && actual_over_target && !near_is_max) begin
|
||||
near_can_increase = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_can_decrease = 1'b0;
|
||||
if (!pending_ratio_is_lossless_or_invalid && actual_under_target &&
|
||||
current_near != 6'd0) begin
|
||||
near_can_decrease = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
active_ratio <= RATIO_LOSSLESS;
|
||||
current_near <= 6'd0;
|
||||
actual_bits_cumulative <= 48'd0;
|
||||
target_bits_cumulative <= 48'd0;
|
||||
target_miss_at_max_near <= 1'b0;
|
||||
pending_update_valid <= 1'b0;
|
||||
pending_actual_bits_sum <= 48'd0;
|
||||
pending_target_bits_sum <= 48'd0;
|
||||
pending_ratio_is_lossless_or_invalid <= 1'b1;
|
||||
end else begin
|
||||
if (image_start_valid) begin
|
||||
active_ratio <= image_ratio;
|
||||
current_near <= 6'd0;
|
||||
actual_bits_cumulative <= 48'd0;
|
||||
target_bits_cumulative <= 48'd0;
|
||||
target_miss_at_max_near <= 1'b0;
|
||||
pending_update_valid <= 1'b0;
|
||||
pending_actual_bits_sum <= 48'd0;
|
||||
pending_target_bits_sum <= 48'd0;
|
||||
pending_ratio_is_lossless_or_invalid <= 1'b1;
|
||||
end else if (pending_update_valid) begin
|
||||
actual_bits_cumulative <= pending_actual_bits_sum;
|
||||
target_bits_cumulative <= pending_target_bits_sum;
|
||||
|
||||
if (pending_ratio_is_lossless_or_invalid) begin
|
||||
current_near <= 6'd0;
|
||||
end else if (near_can_increase) begin
|
||||
current_near <= current_near + 6'd1;
|
||||
end else if (near_can_decrease) begin
|
||||
current_near <= current_near - 6'd1;
|
||||
end
|
||||
|
||||
if (!pending_ratio_is_lossless_or_invalid && actual_over_target && near_is_max) begin
|
||||
target_miss_at_max_near <= 1'b1;
|
||||
end
|
||||
|
||||
pending_update_valid <= 1'b0;
|
||||
end else if (strip_done_valid) begin
|
||||
pending_update_valid <= 1'b1;
|
||||
pending_actual_bits_sum <= actual_bits_sum;
|
||||
pending_target_bits_sum <= target_bits_sum;
|
||||
pending_ratio_is_lossless_or_invalid <= ratio_is_lossless_or_invalid;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
79
fpga/verilog/jls_near_scale_mul.sv
Normal file
79
fpga/verilog/jls_near_scale_mul.sv
Normal file
@@ -0,0 +1,79 @@
|
||||
// Standard : Helper for JPEG-LS Annex A.5/A.6/A.7 odd-scale products
|
||||
// Clause : N/A helper used by multiple Annex arithmetic stages
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : product = multiplicand * (2 * NEAR + 1)
|
||||
// Trace : docs/jls_traceability.md#regular-error-quantization
|
||||
// Example : multiplicand=5 and near_scale=5 gives 25.
|
||||
//
|
||||
// The JPEG-LS NEAR scale is always an odd 6-bit positive value in the range
|
||||
// 1..63. Vivado tended to map these narrow-scale multiplies into cascaded
|
||||
// DSP48E1 structures, which put PCOUT->PCIN on the top timing path. This
|
||||
// helper keeps the operation in carry chains with a fixed three-adder shape:
|
||||
// one partial sum for bits [2:0], one partial sum for bits [5:3], then a final
|
||||
// add. The caller provides the surrounding pipeline registers.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_near_scale_mul #(
|
||||
parameter int INPUT_WIDTH = 33,
|
||||
parameter int OUTPUT_WIDTH = 41
|
||||
) (
|
||||
input var logic signed [INPUT_WIDTH-1:0] multiplicand_i,
|
||||
input var logic [5:0] near_scale_i,
|
||||
output logic signed [OUTPUT_WIDTH-1:0] product_o
|
||||
);
|
||||
|
||||
localparam int EXTEND_WIDTH = OUTPUT_WIDTH - INPUT_WIDTH;
|
||||
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_ext;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_1;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_2;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_3;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_4;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_5;
|
||||
logic signed [OUTPUT_WIDTH-1:0] partial_lo;
|
||||
logic signed [OUTPUT_WIDTH-1:0] partial_hi;
|
||||
|
||||
always_comb begin
|
||||
multiplicand_ext = {{EXTEND_WIDTH{multiplicand_i[INPUT_WIDTH-1]}}, multiplicand_i};
|
||||
multiplicand_shift_1 = multiplicand_ext <<< 1;
|
||||
multiplicand_shift_2 = multiplicand_ext <<< 2;
|
||||
multiplicand_shift_3 = multiplicand_ext <<< 3;
|
||||
multiplicand_shift_4 = multiplicand_ext <<< 4;
|
||||
multiplicand_shift_5 = multiplicand_ext <<< 5;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (near_scale_i[2:0])
|
||||
3'b000: partial_lo = {OUTPUT_WIDTH{1'b0}};
|
||||
3'b001: partial_lo = multiplicand_ext;
|
||||
3'b010: partial_lo = multiplicand_shift_1;
|
||||
3'b011: partial_lo = multiplicand_ext + multiplicand_shift_1;
|
||||
3'b100: partial_lo = multiplicand_shift_2;
|
||||
3'b101: partial_lo = multiplicand_ext + multiplicand_shift_2;
|
||||
3'b110: partial_lo = multiplicand_shift_1 + multiplicand_shift_2;
|
||||
default: partial_lo = multiplicand_ext + multiplicand_shift_1 + multiplicand_shift_2;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (near_scale_i[5:3])
|
||||
3'b000: partial_hi = {OUTPUT_WIDTH{1'b0}};
|
||||
3'b001: partial_hi = multiplicand_shift_3;
|
||||
3'b010: partial_hi = multiplicand_shift_4;
|
||||
3'b011: partial_hi = multiplicand_shift_3 + multiplicand_shift_4;
|
||||
3'b100: partial_hi = multiplicand_shift_5;
|
||||
3'b101: partial_hi = multiplicand_shift_3 + multiplicand_shift_5;
|
||||
3'b110: partial_hi = multiplicand_shift_4 + multiplicand_shift_5;
|
||||
default: partial_hi = multiplicand_shift_3 + multiplicand_shift_4 + multiplicand_shift_5;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
product_o = partial_lo + partial_hi;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
485
fpga/verilog/jls_neighbor_provider.sv
Normal file
485
fpga/verilog/jls_neighbor_provider.sv
Normal file
@@ -0,0 +1,485 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.4 prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Reconstructed neighborhood selection for Ra/Rb/Rc/Rd
|
||||
// Trace : docs/jls_traceability.md#med-predictor
|
||||
// Example : At x=0, Ra and Rb are the first sample from the previous line;
|
||||
// Rc is the previous line's left-edge extension sample, and Rd
|
||||
// is the next previous-line sample.
|
||||
//
|
||||
// Reconstructed-neighbor provider for one grayscale strip frame. JPEG-LS uses
|
||||
// encoder-side reconstructed samples as prediction history. For NEAR=0 the
|
||||
// reconstructed value is exactly the input sample, so this module commits the
|
||||
// sample to line history immediately and removes the feedback bubble. For
|
||||
// NEAR>0 it keeps one pixel outstanding until the true reconstructed sample
|
||||
// returns, preserving near-lossless standard state. The next pixel is accepted
|
||||
// after Rx is committed; this deliberate timing boundary keeps recon_x/recon_y
|
||||
// out of the upstream ready path at the 250 MHz target.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_neighbor_provider #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Maximum supported runtime image width.
|
||||
parameter int MAX_PIC_COL = 6144
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Pixel event from jls_scan_ctrl is valid.
|
||||
input var logic pixel_valid,
|
||||
|
||||
// This provider can accept the current source pixel.
|
||||
output logic pixel_ready,
|
||||
|
||||
// Original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Original-image coordinate. A strip starts when strip_first_pixel is high.
|
||||
input var logic [12:0] pixel_x,
|
||||
input var logic [12:0] pixel_y,
|
||||
|
||||
// High on the last column of the current row. jls_scan_ctrl registers this
|
||||
// flag with pixel_x/y so the width comparison is not on the line-RAM read
|
||||
// path for Rd.
|
||||
input var logic pixel_row_last,
|
||||
|
||||
// Strip-local boundary flags.
|
||||
input var logic strip_first_pixel,
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// Active-strip fast-commit mode. High only when NEAR==0 for this strip, so
|
||||
// the provider can write X directly into line history without waiting for
|
||||
// the later reconstructed-sample return path.
|
||||
input var logic lossless_fast_mode,
|
||||
|
||||
// Pixel/neighborhood event is valid.
|
||||
output logic neigh_valid,
|
||||
|
||||
// Downstream predictor accepted the neighborhood event.
|
||||
input var logic neigh_ready,
|
||||
|
||||
// Forwarded pixel fields.
|
||||
output logic [PIX_WIDTH-1:0] neigh_sample,
|
||||
output logic [12:0] neigh_x,
|
||||
output logic [12:0] neigh_y,
|
||||
output logic neigh_strip_first_pixel,
|
||||
output logic neigh_strip_last_pixel,
|
||||
|
||||
// Standard reconstructed neighbors.
|
||||
output logic [PIX_WIDTH-1:0] Ra,
|
||||
output logic [PIX_WIDTH-1:0] Rb,
|
||||
output logic [PIX_WIDTH-1:0] Rc,
|
||||
output logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// Reconstructed sample writeback from the later error-quantizer/run stage.
|
||||
input var logic recon_valid,
|
||||
|
||||
// This provider is waiting for the current reconstructed sample.
|
||||
output logic recon_ready,
|
||||
|
||||
// Reconstructed sample Rx and its coordinate.
|
||||
input var logic [PIX_WIDTH-1:0] recon_sample,
|
||||
input var logic [12:0] recon_x,
|
||||
input var logic [12:0] recon_y
|
||||
);
|
||||
|
||||
// Two line banks implement previous/current reconstructed rows. The active
|
||||
// read bank is the previous row, while the other bank receives this row.
|
||||
logic [PIX_WIDTH-1:0] line_bank0 [0:MAX_PIC_COL-1];
|
||||
logic [PIX_WIDTH-1:0] line_bank1 [0:MAX_PIC_COL-1];
|
||||
|
||||
// Read/write bank selector. The write bank is the opposite of read_bank.
|
||||
logic read_bank;
|
||||
logic write_bank;
|
||||
|
||||
// High while the current row is the first row of a standalone strip frame.
|
||||
logic top_row_active;
|
||||
|
||||
// One outstanding pixel is held until its reconstructed sample returns when
|
||||
// NEAR>0. The NEAR=0 path does not use this bubble because Rx == X.
|
||||
logic waiting_reconstruct;
|
||||
logic [12:0] outstanding_x;
|
||||
logic [12:0] outstanding_y;
|
||||
logic outstanding_row_last;
|
||||
|
||||
// Left reconstructed neighbor for non-left-edge pixels in the current row.
|
||||
logic [PIX_WIDTH-1:0] left_Ra;
|
||||
logic [PIX_WIDTH-1:0] left_Ra_for_pixel;
|
||||
|
||||
// JPEG-LS left-edge extension state. CharLS models this with a width+2 line
|
||||
// buffer where previous_line[0] contains the first reconstructed sample from
|
||||
// the line before the previous line. For x=0 this value is Rc; it is zero on
|
||||
// the strip top row and on the row immediately after the strip top row.
|
||||
logic [PIX_WIDTH-1:0] left_edge_Rc;
|
||||
logic [PIX_WIDTH-1:0] row_left_Rb;
|
||||
|
||||
// Address and boundary decode for the source pixel.
|
||||
logic pixel_x_is_left_edge;
|
||||
logic pixel_x_is_right_edge;
|
||||
logic [12:0] rb_addr;
|
||||
logic [12:0] rc_addr;
|
||||
logic [12:0] rd_addr;
|
||||
logic effective_top_row_active;
|
||||
|
||||
// Previous-line samples read from the selected bank.
|
||||
logic [PIX_WIDTH-1:0] prev_Rb;
|
||||
logic [PIX_WIDTH-1:0] prev_Rc;
|
||||
logic [PIX_WIDTH-1:0] prev_Rd;
|
||||
|
||||
// Neighborhood values for the accepted pixel.
|
||||
logic [PIX_WIDTH-1:0] Ra_next;
|
||||
logic [PIX_WIDTH-1:0] Rb_next;
|
||||
logic [PIX_WIDTH-1:0] Rc_next;
|
||||
logic [PIX_WIDTH-1:0] Rd_next;
|
||||
|
||||
// Lossless commit path. On the first pixel of a strip, the read bank is
|
||||
// reset to bank0 and the current row writes to bank1; accept_write_bank makes
|
||||
// that same-cycle choice explicit rather than using the old read_bank value.
|
||||
logic accept_pixel_needs_recon;
|
||||
logic accept_pixel_fast_commit;
|
||||
logic accept_write_bank;
|
||||
logic [PIX_WIDTH-1:0] row_left_Rb_next;
|
||||
logic line_write_valid;
|
||||
logic line_write_bank;
|
||||
logic [12:0] line_write_addr;
|
||||
logic [PIX_WIDTH-1:0] line_write_sample;
|
||||
|
||||
// Handshake terms.
|
||||
logic neigh_slot_open;
|
||||
logic accept_pixel;
|
||||
logic accept_recon;
|
||||
logic accept_recon_write;
|
||||
logic recon_is_row_last;
|
||||
logic [12:0] recon_x_plus_one;
|
||||
logic recon_bypass_not_row_last;
|
||||
logic recon_bypass_strip_ok;
|
||||
logic recon_bypass_x_matches;
|
||||
logic recon_bypass_y_matches;
|
||||
logic same_row_recon_bypass_ready;
|
||||
|
||||
always_comb begin
|
||||
write_bank = ~read_bank;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
effective_top_row_active = top_row_active;
|
||||
if (strip_first_pixel) begin
|
||||
effective_top_row_active = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_x_is_left_edge = 1'b0;
|
||||
if (pixel_x == 13'd0) begin
|
||||
pixel_x_is_left_edge = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_x_is_right_edge = pixel_row_last;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
rb_addr = pixel_x;
|
||||
rc_addr = pixel_x;
|
||||
rd_addr = pixel_x;
|
||||
|
||||
if (!pixel_x_is_left_edge) begin
|
||||
rc_addr = pixel_x - 13'd1;
|
||||
end
|
||||
|
||||
if (!pixel_x_is_right_edge) begin
|
||||
rd_addr = pixel_x + 13'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prev_Rb = {PIX_WIDTH{1'b0}};
|
||||
prev_Rc = {PIX_WIDTH{1'b0}};
|
||||
prev_Rd = {PIX_WIDTH{1'b0}};
|
||||
|
||||
if (!effective_top_row_active) begin
|
||||
case (read_bank)
|
||||
1'b0: begin
|
||||
prev_Rb = line_bank0[rb_addr];
|
||||
prev_Rc = line_bank0[rc_addr];
|
||||
prev_Rd = line_bank0[rd_addr];
|
||||
end
|
||||
|
||||
default: begin
|
||||
prev_Rb = line_bank1[rb_addr];
|
||||
prev_Rc = line_bank1[rc_addr];
|
||||
prev_Rd = line_bank1[rd_addr];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
left_Ra_for_pixel = left_Ra;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Ra_next = left_Ra_for_pixel;
|
||||
if (pixel_x_is_left_edge) begin
|
||||
Ra_next = prev_Rb;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Rb_next = prev_Rb;
|
||||
Rc_next = prev_Rc;
|
||||
Rd_next = prev_Rd;
|
||||
|
||||
if (pixel_x_is_left_edge) begin
|
||||
Rc_next = left_edge_Rc;
|
||||
if (effective_top_row_active) begin
|
||||
Rc_next = {PIX_WIDTH{1'b0}};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel_needs_recon = 1'b1;
|
||||
if (lossless_fast_mode) begin
|
||||
accept_pixel_needs_recon = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel_fast_commit = 1'b0;
|
||||
if (accept_pixel && lossless_fast_mode) begin
|
||||
accept_pixel_fast_commit = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_write_bank = write_bank;
|
||||
if (strip_first_pixel) begin
|
||||
accept_write_bank = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
row_left_Rb_next = row_left_Rb;
|
||||
if (strip_first_pixel) begin
|
||||
row_left_Rb_next = {PIX_WIDTH{1'b0}};
|
||||
end else if (pixel_x_is_left_edge) begin
|
||||
row_left_Rb_next = prev_Rb;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// One synthesized write port for the current-row line history. Lossless
|
||||
// fast mode writes X immediately; NEAR>0 writes the returned Rx. The two
|
||||
// cases are mutually exclusive, but muxing them here keeps Vivado from
|
||||
// seeing two unrelated write patterns for the same line-bank memories.
|
||||
line_write_valid = 1'b0;
|
||||
line_write_bank = accept_write_bank;
|
||||
line_write_addr = pixel_x;
|
||||
line_write_sample = pixel_sample;
|
||||
|
||||
if (accept_pixel_fast_commit) begin
|
||||
line_write_valid = 1'b1;
|
||||
end
|
||||
|
||||
if (accept_recon_write) begin
|
||||
line_write_valid = 1'b1;
|
||||
line_write_bank = write_bank;
|
||||
line_write_addr = recon_x;
|
||||
line_write_sample = recon_sample;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
neigh_slot_open = 1'b0;
|
||||
if (!neigh_valid || neigh_ready) begin
|
||||
neigh_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = 1'b0;
|
||||
if (neigh_slot_open && !waiting_reconstruct) begin
|
||||
pixel_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
accept_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_ready = waiting_reconstruct;
|
||||
if (lossless_fast_mode && !waiting_reconstruct) begin
|
||||
recon_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_recon = 1'b0;
|
||||
if (recon_valid && recon_ready) begin
|
||||
accept_recon = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_recon_write = 1'b0;
|
||||
if (accept_recon && waiting_reconstruct) begin
|
||||
accept_recon_write = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_is_row_last = outstanding_row_last;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_x_plus_one = recon_x + 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_not_row_last = 1'b0;
|
||||
if (!recon_is_row_last) begin
|
||||
recon_bypass_not_row_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_strip_ok = 1'b0;
|
||||
if (!strip_first_pixel) begin
|
||||
recon_bypass_strip_ok = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_x_matches = 1'b0;
|
||||
if (pixel_x == recon_x_plus_one) begin
|
||||
recon_bypass_x_matches = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_y_matches = 1'b0;
|
||||
if (pixel_y == recon_y) begin
|
||||
recon_bypass_y_matches = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Diagnostic decode for the previous same-row bypass condition. The
|
||||
// timing path now waits one clock after Rx writeback instead of using this
|
||||
// condition in pixel_ready.
|
||||
same_row_recon_bypass_ready = 1'b0;
|
||||
if (accept_recon_write && recon_bypass_not_row_last && recon_bypass_strip_ok &&
|
||||
recon_bypass_x_matches && recon_bypass_y_matches) begin
|
||||
same_row_recon_bypass_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
read_bank <= 1'b0;
|
||||
top_row_active <= 1'b1;
|
||||
waiting_reconstruct <= 1'b0;
|
||||
outstanding_x <= 13'd0;
|
||||
outstanding_y <= 13'd0;
|
||||
outstanding_row_last <= 1'b0;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= {PIX_WIDTH{1'b0}};
|
||||
row_left_Rb <= {PIX_WIDTH{1'b0}};
|
||||
neigh_valid <= 1'b0;
|
||||
neigh_sample <= {PIX_WIDTH{1'b0}};
|
||||
neigh_x <= 13'd0;
|
||||
neigh_y <= 13'd0;
|
||||
neigh_strip_first_pixel <= 1'b0;
|
||||
neigh_strip_last_pixel <= 1'b0;
|
||||
Ra <= {PIX_WIDTH{1'b0}};
|
||||
Rb <= {PIX_WIDTH{1'b0}};
|
||||
Rc <= {PIX_WIDTH{1'b0}};
|
||||
Rd <= {PIX_WIDTH{1'b0}};
|
||||
end else begin
|
||||
if (neigh_valid && neigh_ready && !accept_pixel) begin
|
||||
neigh_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (accept_pixel) begin
|
||||
neigh_valid <= 1'b1;
|
||||
neigh_sample <= pixel_sample;
|
||||
neigh_x <= pixel_x;
|
||||
neigh_y <= pixel_y;
|
||||
neigh_strip_first_pixel <= strip_first_pixel;
|
||||
neigh_strip_last_pixel <= strip_last_pixel;
|
||||
Ra <= Ra_next;
|
||||
Rb <= Rb_next;
|
||||
Rc <= Rc_next;
|
||||
Rd <= Rd_next;
|
||||
waiting_reconstruct <= accept_pixel_needs_recon;
|
||||
outstanding_x <= pixel_x;
|
||||
outstanding_y <= pixel_y;
|
||||
outstanding_row_last <= pixel_x_is_right_edge;
|
||||
|
||||
if (strip_first_pixel) begin
|
||||
top_row_active <= 1'b1;
|
||||
read_bank <= 1'b0;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= {PIX_WIDTH{1'b0}};
|
||||
row_left_Rb <= {PIX_WIDTH{1'b0}};
|
||||
end else if (pixel_x_is_left_edge) begin
|
||||
row_left_Rb <= prev_Rb;
|
||||
end
|
||||
|
||||
if (accept_pixel_fast_commit) begin
|
||||
left_Ra <= pixel_sample;
|
||||
row_left_Rb <= row_left_Rb_next;
|
||||
|
||||
if (pixel_x_is_right_edge) begin
|
||||
read_bank <= accept_write_bank;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= row_left_Rb_next;
|
||||
top_row_active <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (accept_recon_write) begin
|
||||
left_Ra <= recon_sample;
|
||||
if (!accept_pixel) begin
|
||||
waiting_reconstruct <= 1'b0;
|
||||
end
|
||||
|
||||
if (recon_is_row_last) begin
|
||||
read_bank <= write_bank;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= row_left_Rb;
|
||||
top_row_active <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
if (line_write_valid) begin
|
||||
case (line_write_bank)
|
||||
1'b0: begin
|
||||
line_bank0[line_write_addr] <= line_write_sample;
|
||||
end
|
||||
|
||||
default: begin
|
||||
line_bank1[line_write_addr] <= line_write_sample;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
218
fpga/verilog/jls_output_buffer.sv
Normal file
218
fpga/verilog/jls_output_buffer.sv
Normal file
@@ -0,0 +1,218 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 marker stream byte order
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Encoded byte stream delivery after JPEG-LS bit packing
|
||||
// Trace : docs/jls_traceability.md#jls-output-buffer
|
||||
// Example : A byte event {start=1, byte=8'hFF} becomes ofifo_wdata=9'h1FF.
|
||||
//
|
||||
// Internal output buffer for the 9-bit output FIFO interface. The external
|
||||
// ofifo_full/ofifo_alfull inputs are intentionally ignored by RTL behavior per
|
||||
// the SRS; simulation reports an error if a write happens while ofifo_full=1.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_output_buffer #(
|
||||
// Internal output-buffer capacity in bytes. Default comes from the SRS.
|
||||
parameter int OUT_BUF_BYTES = 8192,
|
||||
|
||||
// Input-pause margin in bytes. pause_req asserts when occupancy reaches
|
||||
// OUT_BUF_BYTES - OUT_BUF_AFULL_MARGIN.
|
||||
parameter int OUT_BUF_AFULL_MARGIN = 256
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Encoded byte event from header writer or bit packer.
|
||||
input var logic byte_valid,
|
||||
|
||||
// This buffer can accept the encoded byte event.
|
||||
output logic byte_ready,
|
||||
|
||||
// JPEG-LS byte in marker-stream order.
|
||||
input var logic [7:0] byte_data,
|
||||
|
||||
// Sideband copied to ofifo_wdata[8] for the original-image first byte only.
|
||||
input var logic original_image_start,
|
||||
|
||||
// Accepted byte event pulse for statistics and dynamic NEAR accounting.
|
||||
output logic byte_accepted,
|
||||
|
||||
// Internal pause request for upstream pipeline throttling.
|
||||
output logic pause_req,
|
||||
|
||||
// Current buffer occupancy in bytes, used by verification reports.
|
||||
output logic [$clog2(OUT_BUF_BYTES + 1)-1:0] buffer_level,
|
||||
|
||||
// Output FIFO write clock, same frequency and phase as clk.
|
||||
output logic ofifo_wclk,
|
||||
|
||||
// Output FIFO write enable.
|
||||
output logic ofifo_wr,
|
||||
|
||||
// Output FIFO data. Bit 8 marks original-image start; bits 7:0 carry bytes.
|
||||
output logic [8:0] ofifo_wdata,
|
||||
|
||||
// Reserved output FIFO full flag. RTL ignores this input for flow control.
|
||||
input var logic ofifo_full,
|
||||
|
||||
// Reserved output FIFO almost-full flag. RTL ignores this input.
|
||||
input var logic ofifo_alfull
|
||||
);
|
||||
|
||||
// Pointer width for the circular byte buffer.
|
||||
localparam int PTR_WIDTH = $clog2(OUT_BUF_BYTES);
|
||||
|
||||
// Occupancy counter width. It must represent OUT_BUF_BYTES exactly.
|
||||
localparam int COUNT_WIDTH = $clog2(OUT_BUF_BYTES + 1);
|
||||
|
||||
// Last legal circular-buffer pointer value.
|
||||
localparam logic [PTR_WIDTH-1:0] PTR_LAST_VALUE = OUT_BUF_BYTES - 1;
|
||||
|
||||
// Buffer capacity and near-full threshold as sized constants.
|
||||
localparam logic [COUNT_WIDTH-1:0] OUT_BUF_BYTES_VALUE = OUT_BUF_BYTES;
|
||||
localparam logic [COUNT_WIDTH-1:0] AFULL_MARGIN_VALUE = OUT_BUF_AFULL_MARGIN;
|
||||
localparam logic [COUNT_WIDTH-1:0] PAUSE_LEVEL_VALUE =
|
||||
OUT_BUF_BYTES - OUT_BUF_AFULL_MARGIN;
|
||||
|
||||
// Circular storage. Bit 8 is original_image_start, bits 7:0 are stream byte.
|
||||
logic [8:0] buffer_mem [0:OUT_BUF_BYTES-1];
|
||||
|
||||
// Circular write and read pointers.
|
||||
logic [PTR_WIDTH-1:0] write_ptr;
|
||||
logic [PTR_WIDTH-1:0] read_ptr;
|
||||
logic [PTR_WIDTH-1:0] write_ptr_next;
|
||||
logic [PTR_WIDTH-1:0] read_ptr_next;
|
||||
|
||||
// Occupancy and status flags.
|
||||
logic [COUNT_WIDTH-1:0] occupancy_count;
|
||||
logic buffer_empty;
|
||||
logic buffer_full;
|
||||
logic push_byte;
|
||||
logic pop_byte;
|
||||
|
||||
// Packed byte event stored in the internal buffer.
|
||||
logic [8:0] buffer_write_word;
|
||||
|
||||
// Reserved input observation signal keeps intent explicit without changing
|
||||
// flow control behavior.
|
||||
logic ofifo_alfull_ignored;
|
||||
|
||||
assign ofifo_wclk = clk;
|
||||
|
||||
always_comb begin
|
||||
buffer_level = occupancy_count;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
buffer_write_word = {original_image_start, byte_data};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
buffer_empty = 1'b0;
|
||||
if (occupancy_count == {COUNT_WIDTH{1'b0}}) begin
|
||||
buffer_empty = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
buffer_full = 1'b0;
|
||||
if (occupancy_count == OUT_BUF_BYTES_VALUE) begin
|
||||
buffer_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_ready = 1'b0;
|
||||
if (!buffer_full) begin
|
||||
byte_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_accepted = 1'b0;
|
||||
if (byte_valid && byte_ready) begin
|
||||
byte_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pop_byte = 1'b0;
|
||||
if (!buffer_empty) begin
|
||||
pop_byte = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
push_byte = byte_accepted;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pause_req = 1'b0;
|
||||
if (occupancy_count >= PAUSE_LEVEL_VALUE) begin
|
||||
pause_req = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
write_ptr_next = write_ptr + {{(PTR_WIDTH-1){1'b0}}, 1'b1};
|
||||
if (write_ptr == PTR_LAST_VALUE) begin
|
||||
write_ptr_next = {PTR_WIDTH{1'b0}};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_ptr_next = read_ptr + {{(PTR_WIDTH-1){1'b0}}, 1'b1};
|
||||
if (read_ptr == PTR_LAST_VALUE) begin
|
||||
read_ptr_next = {PTR_WIDTH{1'b0}};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ofifo_alfull_ignored = ofifo_alfull;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
write_ptr <= {PTR_WIDTH{1'b0}};
|
||||
read_ptr <= {PTR_WIDTH{1'b0}};
|
||||
occupancy_count <= {COUNT_WIDTH{1'b0}};
|
||||
ofifo_wr <= 1'b0;
|
||||
ofifo_wdata <= 9'd0;
|
||||
end else begin
|
||||
ofifo_wr <= pop_byte;
|
||||
|
||||
if (pop_byte) begin
|
||||
ofifo_wdata <= buffer_mem[read_ptr];
|
||||
read_ptr <= read_ptr_next;
|
||||
end else begin
|
||||
ofifo_wdata <= 9'd0;
|
||||
end
|
||||
|
||||
if (push_byte) begin
|
||||
buffer_mem[write_ptr] <= buffer_write_word;
|
||||
write_ptr <= write_ptr_next;
|
||||
end
|
||||
|
||||
case ({push_byte, pop_byte})
|
||||
2'b10: begin
|
||||
occupancy_count <= occupancy_count + {{(COUNT_WIDTH-1){1'b0}}, 1'b1};
|
||||
end
|
||||
|
||||
2'b01: begin
|
||||
occupancy_count <= occupancy_count - {{(COUNT_WIDTH-1){1'b0}}, 1'b1};
|
||||
end
|
||||
|
||||
default: begin
|
||||
occupancy_count <= occupancy_count;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
196
fpga/verilog/jls_prediction_corrector.sv
Normal file
196
fpga/verilog/jls_prediction_corrector.sv
Normal file
@@ -0,0 +1,196 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex A.6 bias variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Px correction by C[Q] followed by bounds correction
|
||||
// Trace : docs/jls_traceability.md#regular-prediction-correction
|
||||
// Example : Px=20,C=-3,negative_context=0 gives corrected_Px=17.
|
||||
//
|
||||
// Registered prediction correction stage. It applies context sign to C[Q],
|
||||
// adds the result to the MED prediction Px, and clamps the prediction to
|
||||
// 0..MAXVAL like the JPEG-LS correct_prediction operation.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_prediction_corrector #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input context event is valid.
|
||||
input var logic context_valid,
|
||||
|
||||
// This stage can accept the current context event.
|
||||
output logic context_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] context_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] context_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] context_y,
|
||||
|
||||
// Forwarded strip boundary flags.
|
||||
input var logic context_strip_first_pixel,
|
||||
input var logic context_strip_last_pixel,
|
||||
|
||||
// JPEG-LS MED prediction value Px.
|
||||
input var logic [PIX_WIDTH-1:0] Px,
|
||||
|
||||
// Standard bias correction variable C[Q], range -128..127.
|
||||
input var logic [31:0] A,
|
||||
input var logic signed [31:0] B,
|
||||
input var logic signed [8:0] C,
|
||||
input var logic [15:0] N,
|
||||
|
||||
// High when the quantized context sign is negative.
|
||||
input var logic context_negative,
|
||||
|
||||
// Context index and run-mode flag are forwarded for later stages.
|
||||
input var logic [8:0] context_index,
|
||||
input var logic run_mode_context,
|
||||
|
||||
// Corrected prediction event is valid.
|
||||
output logic corrected_valid,
|
||||
|
||||
// Downstream regular/run-mode stage accepted this event.
|
||||
input var logic corrected_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
output logic [PIX_WIDTH-1:0] corrected_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] corrected_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] corrected_y,
|
||||
|
||||
// Forwarded strip boundary flags.
|
||||
output logic corrected_strip_first_pixel,
|
||||
output logic corrected_strip_last_pixel,
|
||||
|
||||
// Corrected and clamped prediction value.
|
||||
output logic [PIX_WIDTH-1:0] corrected_Px,
|
||||
|
||||
// Forwarded context metadata.
|
||||
output logic [8:0] corrected_context_index,
|
||||
output logic corrected_context_negative,
|
||||
output logic corrected_run_mode_context,
|
||||
|
||||
// Forwarded pre-update context variables for jls_context_update.
|
||||
output logic [31:0] corrected_A,
|
||||
output logic signed [31:0] corrected_B,
|
||||
output logic signed [8:0] corrected_C,
|
||||
output logic [15:0] corrected_N
|
||||
);
|
||||
|
||||
// Signed working width covers 16-bit MAXVAL plus the C[Q] range.
|
||||
localparam logic signed [18:0] MAXVAL_VALUE = (19'sd1 <<< PIX_WIDTH) - 19'sd1;
|
||||
|
||||
// Context-signed C and prediction sum.
|
||||
logic signed [18:0] C_ext;
|
||||
logic signed [18:0] signed_C;
|
||||
logic signed [18:0] Px_ext;
|
||||
logic signed [18:0] prediction_sum;
|
||||
logic [PIX_WIDTH-1:0] corrected_Px_next;
|
||||
|
||||
// Handshake terms.
|
||||
logic slot_open;
|
||||
logic accept_context;
|
||||
|
||||
always_comb begin
|
||||
C_ext = {{10{C[8]}}, C};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
signed_C = C_ext;
|
||||
if (context_negative) begin
|
||||
signed_C = -C_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Px_ext = $signed({3'd0, Px});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prediction_sum = Px_ext + signed_C;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
corrected_Px_next = prediction_sum[PIX_WIDTH-1:0];
|
||||
if (prediction_sum < 19'sd0) begin
|
||||
corrected_Px_next = {PIX_WIDTH{1'b0}};
|
||||
end else if (prediction_sum > MAXVAL_VALUE) begin
|
||||
corrected_Px_next = MAXVAL_VALUE[PIX_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_open = 1'b0;
|
||||
if (!corrected_valid || corrected_ready) begin
|
||||
slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_ready = slot_open;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_context = 1'b0;
|
||||
if (context_valid && context_ready) begin
|
||||
accept_context = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
corrected_valid <= 1'b0;
|
||||
corrected_sample <= {PIX_WIDTH{1'b0}};
|
||||
corrected_x <= 13'd0;
|
||||
corrected_y <= 13'd0;
|
||||
corrected_strip_first_pixel <= 1'b0;
|
||||
corrected_strip_last_pixel <= 1'b0;
|
||||
corrected_Px <= {PIX_WIDTH{1'b0}};
|
||||
corrected_context_index <= 9'd0;
|
||||
corrected_context_negative <= 1'b0;
|
||||
corrected_run_mode_context <= 1'b0;
|
||||
corrected_A <= 32'd0;
|
||||
corrected_B <= 32'sd0;
|
||||
corrected_C <= 9'sd0;
|
||||
corrected_N <= 16'd0;
|
||||
end else begin
|
||||
if (corrected_valid && corrected_ready && !accept_context) begin
|
||||
corrected_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (accept_context) begin
|
||||
corrected_valid <= 1'b1;
|
||||
corrected_sample <= context_sample;
|
||||
corrected_x <= context_x;
|
||||
corrected_y <= context_y;
|
||||
corrected_strip_first_pixel <= context_strip_first_pixel;
|
||||
corrected_strip_last_pixel <= context_strip_last_pixel;
|
||||
corrected_Px <= corrected_Px_next;
|
||||
corrected_context_index <= context_index;
|
||||
corrected_context_negative <= context_negative;
|
||||
corrected_run_mode_context <= run_mode_context;
|
||||
corrected_A <= A;
|
||||
corrected_B <= B;
|
||||
corrected_C <= C;
|
||||
corrected_N <= N;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
273
fpga/verilog/jls_predictor.sv
Normal file
273
fpga/verilog/jls_predictor.sv
Normal file
@@ -0,0 +1,273 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.4 prediction, Annex G.1 regular-mode variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : MED predictor / Px calculation from Ra, Rb, and Rc
|
||||
// Trace : docs/jls_traceability.md#med-predictor
|
||||
// Example : If Ra=10, Rb=20, Rc=15, Px=Ra+Rb-Rc=15.
|
||||
//
|
||||
// Registered MED predictor stage. A separate line-buffer stage supplies the
|
||||
// reconstructed neighbors Ra/Rb/Rc/Rd. This split keeps the neighbor memory
|
||||
// path independent from the MED compare/add path for the 250 MHz target.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_predictor #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input pixel/neighborhood event is valid.
|
||||
input var logic pixel_valid,
|
||||
|
||||
// This predictor can accept the current input event.
|
||||
output logic pixel_ready,
|
||||
|
||||
// Original input sample X from the standard encoder notation.
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Zero-based original-image column coordinate.
|
||||
input var logic [12:0] pixel_x,
|
||||
|
||||
// Zero-based original-image row coordinate.
|
||||
input var logic [12:0] pixel_y,
|
||||
|
||||
// First pixel of the current standalone strip frame.
|
||||
input var logic strip_first_pixel,
|
||||
|
||||
// Last pixel of the current standalone strip frame.
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// Reconstructed neighbor to the left of X.
|
||||
input var logic [PIX_WIDTH-1:0] Ra,
|
||||
|
||||
// Reconstructed neighbor above X.
|
||||
input var logic [PIX_WIDTH-1:0] Rb,
|
||||
|
||||
// Reconstructed neighbor above-left of X.
|
||||
input var logic [PIX_WIDTH-1:0] Rc,
|
||||
|
||||
// Reconstructed neighbor above-right of X. Forwarded for context gradients.
|
||||
input var logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// Predicted event is valid.
|
||||
output logic predict_valid,
|
||||
|
||||
// Downstream context/error stage accepted the current predicted event.
|
||||
input var logic predict_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
output logic [PIX_WIDTH-1:0] predict_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] predict_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] predict_y,
|
||||
|
||||
// Forwarded first-pixel flag for strip-local state reset.
|
||||
output logic predict_strip_first_pixel,
|
||||
|
||||
// Forwarded last-pixel flag for strip-local flush handling.
|
||||
output logic predict_strip_last_pixel,
|
||||
|
||||
// Forwarded reconstructed neighbors for context quantization.
|
||||
output logic [PIX_WIDTH-1:0] predict_Ra,
|
||||
output logic [PIX_WIDTH-1:0] predict_Rb,
|
||||
output logic [PIX_WIDTH-1:0] predict_Rc,
|
||||
output logic [PIX_WIDTH-1:0] predict_Rd,
|
||||
|
||||
// JPEG-LS MED prediction value Px.
|
||||
output logic [PIX_WIDTH-1:0] Px
|
||||
);
|
||||
|
||||
// One extra bit keeps Ra+Rb-Rc arithmetic inside a non-overflowing range.
|
||||
logic [PIX_WIDTH:0] ra_ext;
|
||||
logic [PIX_WIDTH:0] rb_ext;
|
||||
logic [PIX_WIDTH:0] rc_ext;
|
||||
logic [PIX_WIDTH:0] neighbor_min_ext;
|
||||
logic [PIX_WIDTH:0] neighbor_max_ext;
|
||||
logic [PIX_WIDTH:0] med_sum_ext;
|
||||
logic [PIX_WIDTH-1:0] med_selected;
|
||||
|
||||
// Split comparison terms. This mirrors the standard if/else decision while
|
||||
// making the logic depth visible for later pipeline review.
|
||||
logic ra_ge_rb;
|
||||
logic rc_ge_neighbor_max;
|
||||
logic rc_le_neighbor_min;
|
||||
logic output_queue_full;
|
||||
logic accept_pixel;
|
||||
logic output_accept;
|
||||
|
||||
// Second output slot. This local two-entry queue breaks downstream
|
||||
// ready/CE fan-in from the mode-router output while preserving event order.
|
||||
// The first slot is the public predict_* register set; this slot holds the
|
||||
// next Annex A.4 MED prediction event if the downstream stage is stalled.
|
||||
logic predict_next_valid;
|
||||
logic [PIX_WIDTH-1:0] predict_next_sample;
|
||||
logic [12:0] predict_next_x;
|
||||
logic [12:0] predict_next_y;
|
||||
logic predict_next_strip_first_pixel;
|
||||
logic predict_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Rd;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Px;
|
||||
|
||||
always_comb begin
|
||||
ra_ext = {1'b0, Ra};
|
||||
rb_ext = {1'b0, Rb};
|
||||
rc_ext = {1'b0, Rc};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ra_ge_rb = 1'b0;
|
||||
if (Ra >= Rb) begin
|
||||
ra_ge_rb = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
neighbor_min_ext = ra_ext;
|
||||
neighbor_max_ext = rb_ext;
|
||||
if (ra_ge_rb) begin
|
||||
neighbor_min_ext = rb_ext;
|
||||
neighbor_max_ext = ra_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
rc_ge_neighbor_max = 1'b0;
|
||||
if (rc_ext >= neighbor_max_ext) begin
|
||||
rc_ge_neighbor_max = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
rc_le_neighbor_min = 1'b0;
|
||||
if (rc_ext <= neighbor_min_ext) begin
|
||||
rc_le_neighbor_min = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
med_sum_ext = ra_ext + rb_ext - rc_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
med_selected = med_sum_ext[PIX_WIDTH-1:0];
|
||||
if (rc_ge_neighbor_max) begin
|
||||
med_selected = neighbor_min_ext[PIX_WIDTH-1:0];
|
||||
end else if (rc_le_neighbor_min) begin
|
||||
med_selected = neighbor_max_ext[PIX_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
output_queue_full = 1'b0;
|
||||
if (predict_valid && predict_next_valid) begin
|
||||
output_queue_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = !output_queue_full;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
accept_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
output_accept = 1'b0;
|
||||
if (predict_valid && predict_ready) begin
|
||||
output_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
predict_valid <= 1'b0;
|
||||
predict_next_valid <= 1'b0;
|
||||
predict_sample <= {PIX_WIDTH{1'b0}};
|
||||
predict_x <= 13'd0;
|
||||
predict_y <= 13'd0;
|
||||
predict_strip_first_pixel <= 1'b0;
|
||||
predict_strip_last_pixel <= 1'b0;
|
||||
predict_Ra <= {PIX_WIDTH{1'b0}};
|
||||
predict_Rb <= {PIX_WIDTH{1'b0}};
|
||||
predict_Rc <= {PIX_WIDTH{1'b0}};
|
||||
predict_Rd <= {PIX_WIDTH{1'b0}};
|
||||
Px <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_x <= 13'd0;
|
||||
predict_next_y <= 13'd0;
|
||||
predict_next_strip_first_pixel <= 1'b0;
|
||||
predict_next_strip_last_pixel <= 1'b0;
|
||||
predict_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Px <= {PIX_WIDTH{1'b0}};
|
||||
end else begin
|
||||
if (output_accept) begin
|
||||
if (predict_next_valid) begin
|
||||
predict_valid <= 1'b1;
|
||||
predict_sample <= predict_next_sample;
|
||||
predict_x <= predict_next_x;
|
||||
predict_y <= predict_next_y;
|
||||
predict_strip_first_pixel <= predict_next_strip_first_pixel;
|
||||
predict_strip_last_pixel <= predict_next_strip_last_pixel;
|
||||
predict_Ra <= predict_next_Ra;
|
||||
predict_Rb <= predict_next_Rb;
|
||||
predict_Rc <= predict_next_Rc;
|
||||
predict_Rd <= predict_next_Rd;
|
||||
Px <= predict_next_Px;
|
||||
predict_next_valid <= 1'b0;
|
||||
end else begin
|
||||
predict_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
if (accept_pixel) begin
|
||||
if (!predict_valid || output_accept) begin
|
||||
predict_valid <= 1'b1;
|
||||
predict_sample <= pixel_sample;
|
||||
predict_x <= pixel_x;
|
||||
predict_y <= pixel_y;
|
||||
predict_strip_first_pixel <= strip_first_pixel;
|
||||
predict_strip_last_pixel <= strip_last_pixel;
|
||||
predict_Ra <= Ra;
|
||||
predict_Rb <= Rb;
|
||||
predict_Rc <= Rc;
|
||||
predict_Rd <= Rd;
|
||||
Px <= med_selected;
|
||||
end else begin
|
||||
predict_next_valid <= 1'b1;
|
||||
predict_next_sample <= pixel_sample;
|
||||
predict_next_x <= pixel_x;
|
||||
predict_next_y <= pixel_y;
|
||||
predict_next_strip_first_pixel <= strip_first_pixel;
|
||||
predict_next_strip_last_pixel <= strip_last_pixel;
|
||||
predict_next_Ra <= Ra;
|
||||
predict_next_Rb <= Rb;
|
||||
predict_next_Rc <= Rc;
|
||||
predict_next_Rd <= Rd;
|
||||
predict_next_Px <= med_selected;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
138
fpga/verilog/jls_preset_defaults.sv
Normal file
138
fpga/verilog/jls_preset_defaults.sv
Normal file
@@ -0,0 +1,138 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.4.1.1 preset coding parameters
|
||||
// Figure : C.3 clamping function, referenced by default threshold rules
|
||||
// Table : Table C.1 valid preset parameters, Table C.2 RESET, Table C.3 defaults
|
||||
// Pseudocode : Default threshold calculation for MAXVAL >= 128
|
||||
// Trace : docs/jls_traceability.md#jls-preset-defaults
|
||||
// Example : PIX_WIDTH=8, NEAR=0 gives MAXVAL=255, T1=3, T2=7, T3=21.
|
||||
//
|
||||
// JPEG-LS default preset coding parameter helper. The first RTL version only
|
||||
// supports 8/10/12/14/16-bit grayscale samples and NEAR is clamped to 0..31.
|
||||
// For all supported sample precisions MAXVAL >= 128. With NEAR <= 31 the
|
||||
// default thresholds do not hit MAXVAL, so the standard C.2.4.1.1 equations
|
||||
// reduce to shallow shift-add expressions:
|
||||
// T1 = FACTOR * 1 + 2 + 3*NEAR
|
||||
// T2 = FACTOR * 4 + 3 + 5*NEAR
|
||||
// T3 = FACTOR * 17 + 4 + 7*NEAR
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_preset_defaults #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Requested NEAR value. Values above 31 are clamped defensively.
|
||||
input var logic [5:0] near,
|
||||
|
||||
// JPEG-LS LSE MAXVAL preset coding parameter.
|
||||
output logic [15:0] preset_maxval,
|
||||
|
||||
// JPEG-LS LSE T1 preset coding parameter.
|
||||
output logic [15:0] preset_t1,
|
||||
|
||||
// JPEG-LS LSE T2 preset coding parameter.
|
||||
output logic [15:0] preset_t2,
|
||||
|
||||
// JPEG-LS LSE T3 preset coding parameter.
|
||||
output logic [15:0] preset_t3,
|
||||
|
||||
// JPEG-LS LSE RESET preset coding parameter.
|
||||
output logic [15:0] preset_reset
|
||||
);
|
||||
|
||||
// Default RESET value from T.87 Table C.2.
|
||||
localparam logic [15:0] DEFAULT_RESET_VALUE = 16'd64;
|
||||
|
||||
// Defensive NEAR clamp for the project maximum.
|
||||
logic [5:0] near_clamped;
|
||||
|
||||
// Shift-add terms for 3*NEAR, 5*NEAR, and 7*NEAR.
|
||||
logic [15:0] near_ext;
|
||||
logic [15:0] near_times_2;
|
||||
logic [15:0] near_times_3;
|
||||
logic [15:0] near_times_4;
|
||||
logic [15:0] near_times_5;
|
||||
logic [15:0] near_times_7;
|
||||
|
||||
// Base threshold values after applying the standard FACTOR term.
|
||||
logic [15:0] base_t1;
|
||||
logic [15:0] base_t2;
|
||||
logic [15:0] base_t3;
|
||||
|
||||
always_comb begin
|
||||
near_clamped = near;
|
||||
if (near > 6'd31) begin
|
||||
near_clamped = 6'd31;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_ext = {10'd0, near_clamped};
|
||||
near_times_2 = {near_ext[14:0], 1'b0};
|
||||
near_times_3 = near_times_2 + near_ext;
|
||||
near_times_4 = {near_ext[13:0], 2'b00};
|
||||
near_times_5 = near_times_4 + near_ext;
|
||||
near_times_7 = near_times_4 + near_times_2 + near_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
preset_maxval = 16'hFFFF;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
|
||||
case (PIX_WIDTH)
|
||||
8: begin
|
||||
preset_maxval = 16'd255;
|
||||
base_t1 = 16'd3;
|
||||
base_t2 = 16'd7;
|
||||
base_t3 = 16'd21;
|
||||
end
|
||||
|
||||
10: begin
|
||||
preset_maxval = 16'd1023;
|
||||
base_t1 = 16'd6;
|
||||
base_t2 = 16'd19;
|
||||
base_t3 = 16'd72;
|
||||
end
|
||||
|
||||
12: begin
|
||||
preset_maxval = 16'd4095;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
|
||||
14: begin
|
||||
preset_maxval = 16'd16383;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
|
||||
16: begin
|
||||
preset_maxval = 16'hFFFF;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
|
||||
default: begin
|
||||
preset_maxval = 16'hFFFF;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
preset_t1 = base_t1 + near_times_3;
|
||||
preset_t2 = base_t2 + near_times_5;
|
||||
preset_t3 = base_t3 + near_times_7;
|
||||
preset_reset = DEFAULT_RESET_VALUE;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
805
fpga/verilog/jls_regular_error_quantizer.sv
Normal file
805
fpga/verilog/jls_regular_error_quantizer.sv
Normal file
@@ -0,0 +1,805 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex A.2 RANGE
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval quantization/modulo and reconstructed sample computation
|
||||
// Trace : docs/jls_traceability.md#regular-error-quantization
|
||||
// Example : X=24, Px=20, NEAR=1 gives Errval=1 and Rx=23.
|
||||
//
|
||||
// Regular-mode error quantizer and reconstructed-sample calculator. NEAR>0
|
||||
// uses an exact reciprocal-LUT multiply and correction pipeline: one cycle for
|
||||
// the reciprocal multiply, one cycle for the quotient correction, then the
|
||||
// standard Annex A.5 modulo/reconstruction result. This avoids a large
|
||||
// combinational divider while reducing the earlier one-bit-per-cycle latency.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_regular_error_quantizer #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Corrected prediction event is valid.
|
||||
input var logic corrected_valid,
|
||||
|
||||
// This stage can accept the current event.
|
||||
output logic corrected_ready,
|
||||
|
||||
// Original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] corrected_sample,
|
||||
|
||||
// Pixel coordinate forwarded for line-buffer writeback/reporting.
|
||||
input var logic [12:0] corrected_x,
|
||||
input var logic [12:0] corrected_y,
|
||||
|
||||
// Strip boundary flags forwarded with the result.
|
||||
input var logic corrected_strip_first_pixel,
|
||||
input var logic corrected_strip_last_pixel,
|
||||
|
||||
// Corrected prediction value after C[Q] and bounds correction.
|
||||
input var logic [PIX_WIDTH-1:0] corrected_Px,
|
||||
|
||||
// Forwarded context metadata.
|
||||
input var logic [8:0] corrected_context_index,
|
||||
input var logic corrected_context_negative,
|
||||
input var logic corrected_run_mode_context,
|
||||
|
||||
// Pre-update context variables forwarded from jls_context_model.
|
||||
input var logic [31:0] corrected_A,
|
||||
input var logic signed [31:0] corrected_B,
|
||||
input var logic signed [8:0] corrected_C,
|
||||
input var logic [15:0] corrected_N,
|
||||
|
||||
// Coding parameters for the current strip frame.
|
||||
input var logic [16:0] RANGE,
|
||||
input var logic [4:0] qbpp,
|
||||
input var logic [6:0] LIMIT,
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Quantized error event is valid.
|
||||
output logic err_valid,
|
||||
|
||||
// Downstream context-update stage accepted the event.
|
||||
input var logic err_ready,
|
||||
|
||||
// Quantized signed prediction error Errval after context sign handling.
|
||||
output logic signed [31:0] Errval,
|
||||
|
||||
// Reconstructed sample Rx used by the line-buffer stage.
|
||||
output logic [PIX_WIDTH-1:0] reconstructed_sample,
|
||||
|
||||
// Forwarded coordinate and strip flags.
|
||||
output logic [12:0] err_x,
|
||||
output logic [12:0] err_y,
|
||||
output logic err_strip_first_pixel,
|
||||
output logic err_strip_last_pixel,
|
||||
|
||||
// Forwarded context and coding metadata.
|
||||
output logic [8:0] err_context_index,
|
||||
output logic err_context_negative,
|
||||
output logic err_run_mode_context,
|
||||
output logic [4:0] err_qbpp,
|
||||
output logic [6:0] err_LIMIT,
|
||||
|
||||
// Forwarded pre-update context variables for jls_context_update.
|
||||
output logic [31:0] err_A,
|
||||
output logic signed [31:0] err_B,
|
||||
output logic signed [8:0] err_C,
|
||||
output logic [15:0] err_N
|
||||
);
|
||||
|
||||
// Divider width covers max numerator MAXVAL + NEAR.
|
||||
localparam int DIV_WIDTH = PIX_WIDTH + 1;
|
||||
localparam int RECIP_SHIFT = 24;
|
||||
localparam int RECIP_MAGIC_WIDTH = 23;
|
||||
localparam int RECIP_PRODUCT_WIDTH = DIV_WIDTH + RECIP_MAGIC_WIDTH;
|
||||
localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 6;
|
||||
|
||||
// State for the exact reciprocal-LUT division pipeline when NEAR > 0.
|
||||
typedef enum logic [3:0] {
|
||||
STATE_IDLE = 4'd0,
|
||||
STATE_DIV_MUL = 4'd1,
|
||||
STATE_DIV_CHECK = 4'd2,
|
||||
STATE_DIV_CORRECT = 4'd3,
|
||||
STATE_ERRVAL = 4'd4,
|
||||
STATE_RECON_MUL = 4'd5,
|
||||
STATE_RECON_SUM = 4'd6,
|
||||
STATE_RECON_CALC = 4'd7,
|
||||
STATE_RECON_CLAMP = 4'd8,
|
||||
STATE_FINISH = 4'd9,
|
||||
STATE_INPUT_PREP = 4'd10,
|
||||
STATE_ERRVAL_SIGN = 4'd11,
|
||||
STATE_NUMERATOR_PREP = 4'd12,
|
||||
STATE_RECON_FACTORS = 4'd13,
|
||||
STATE_ERRVAL_PREP = 4'd14
|
||||
} quant_state_e;
|
||||
|
||||
// One-hot state decode keeps per-stage enables shallow. This is important
|
||||
// when explicit timing-boundary registers below are preserved for 250 MHz.
|
||||
(* fsm_encoding = "one_hot" *) quant_state_e state;
|
||||
|
||||
// Latched event fields.
|
||||
logic [PIX_WIDTH-1:0] sample_latched;
|
||||
logic [PIX_WIDTH-1:0] Px_latched;
|
||||
logic [12:0] x_latched;
|
||||
logic [12:0] y_latched;
|
||||
logic strip_first_latched;
|
||||
logic strip_last_latched;
|
||||
logic [8:0] context_index_latched;
|
||||
logic context_negative_latched;
|
||||
logic run_mode_latched;
|
||||
logic [31:0] A_latched;
|
||||
logic signed [31:0] B_latched;
|
||||
logic signed [8:0] C_latched;
|
||||
logic [15:0] N_latched;
|
||||
logic [16:0] RANGE_latched;
|
||||
logic [4:0] qbpp_latched;
|
||||
logic [6:0] LIMIT_latched;
|
||||
logic [5:0] NEAR_latched;
|
||||
logic signed [32:0] oriented_error_latched;
|
||||
logic quotient_negative_latched;
|
||||
|
||||
// Reciprocal-division registers and combinational next values.
|
||||
logic [DIV_WIDTH-1:0] div_dividend;
|
||||
logic [DIV_WIDTH-1:0] div_quotient;
|
||||
logic [5:0] div_denominator;
|
||||
logic [RECIP_MAGIC_WIDTH-1:0] div_magic;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_product;
|
||||
logic [5:0] divisor_small_next;
|
||||
logic [RECIP_MAGIC_WIDTH-1:0] reciprocal_magic_next;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_dividend_product_ext;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_magic_product_ext;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_product_next;
|
||||
logic [DIV_WIDTH-1:0] recip_quotient_est;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_quotient_est_ext;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_divisor_ext;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_check_product;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_dividend_ext;
|
||||
logic [DIV_WIDTH-1:0] recip_quotient_est_latched;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_check_product_latched;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_dividend_ext_latched;
|
||||
logic [DIV_WIDTH-1:0] recip_quotient_corrected;
|
||||
|
||||
// Input arithmetic.
|
||||
logic signed [32:0] sample_ext;
|
||||
logic signed [32:0] Px_ext;
|
||||
logic signed [32:0] sample_minus_px;
|
||||
logic signed [32:0] oriented_error_next;
|
||||
logic signed [32:0] neg_oriented_error_next;
|
||||
logic quotient_negative_next;
|
||||
logic [DIV_WIDTH-1:0] division_numerator_next;
|
||||
logic signed [32:0] division_numerator_positive;
|
||||
logic signed [32:0] division_numerator_negative;
|
||||
// Result arithmetic.
|
||||
logic signed [32:0] quotient_signed;
|
||||
logic signed [32:0] raw_Errval;
|
||||
logic signed [32:0] range_ext;
|
||||
logic signed [32:0] range_midpoint_ext;
|
||||
logic signed [32:0] modulo_Errval_after_add;
|
||||
logic signed [32:0] modulo_Errval_after_add_latched;
|
||||
logic signed [32:0] modulo_Errval;
|
||||
logic signed [32:0] sign_restored_Errval;
|
||||
logic signed [32:0] modulo_Errval_latched;
|
||||
// Timing boundary between Annex A.5 modulo/sign restoration and
|
||||
// reconstructed-sample dequantization. This register remains as a visible
|
||||
// pipeline stage for the odd-scale carry-chain multiplier used below.
|
||||
logic signed [32:0] sign_restored_Errval_latched;
|
||||
logic signed [32:0] sign_restored_mul_latched;
|
||||
logic signed [6:0] near_scale_latched;
|
||||
logic signed [40:0] dequantized_error;
|
||||
logic signed [40:0] dequantized_error_latched;
|
||||
logic signed [40:0] reconstruction_base;
|
||||
logic signed [40:0] reconstruction_base_latched;
|
||||
logic signed [40:0] reconstruction_sum;
|
||||
logic signed [40:0] reconstruction_sum_latched;
|
||||
logic signed [40:0] range_scaled;
|
||||
logic signed [40:0] range_scaled_latched;
|
||||
logic signed [40:0] reconstruction_fixed;
|
||||
logic signed [40:0] reconstruction_fixed_latched;
|
||||
logic signed [40:0] maxval_ext;
|
||||
logic signed [40:0] maxval_ext_latched;
|
||||
logic signed [40:0] near_ext;
|
||||
logic signed [40:0] near_ext_latched;
|
||||
logic signed [40:0] maxval_plus_near_latched;
|
||||
logic signed [40:0] negative_near_latched;
|
||||
logic [PIX_WIDTH-1:0] reconstructed_next;
|
||||
logic [PIX_WIDTH-1:0] reconstructed_calc_latched;
|
||||
|
||||
// Handshake and acceptance terms.
|
||||
logic output_slot_open;
|
||||
logic accept_corrected;
|
||||
|
||||
// Shared odd-scale multipliers for Annex A.5 reconstruction terms.
|
||||
jls_near_scale_mul #(
|
||||
.INPUT_WIDTH(33),
|
||||
.OUTPUT_WIDTH(41)
|
||||
) regular_recon_err_mul_i (
|
||||
.multiplicand_i(sign_restored_mul_latched),
|
||||
.near_scale_i(near_scale_latched[5:0]),
|
||||
.product_o(dequantized_error)
|
||||
);
|
||||
|
||||
jls_near_scale_mul #(
|
||||
.INPUT_WIDTH(18),
|
||||
.OUTPUT_WIDTH(41)
|
||||
) regular_recon_range_mul_i (
|
||||
.multiplicand_i($signed({1'b0, RANGE_latched})),
|
||||
.near_scale_i(near_scale_latched[5:0]),
|
||||
.product_o(range_scaled)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
output_slot_open = 1'b0;
|
||||
if (!err_valid || err_ready) begin
|
||||
output_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Timing note: input acceptance is decoupled from err_ready. This state
|
||||
// machine has several cycles before STATE_FINISH, so a pending err_valid
|
||||
// can drain while the next pixel is being processed. If it has not drained
|
||||
// by STATE_FINISH, the output_slot_open check below holds the result.
|
||||
corrected_ready = 1'b0;
|
||||
if (state == STATE_IDLE) begin
|
||||
corrected_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_corrected = 1'b0;
|
||||
if (corrected_valid && corrected_ready) begin
|
||||
accept_corrected = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
sample_ext = $signed({17'd0, sample_latched});
|
||||
Px_ext = $signed({17'd0, Px_latched});
|
||||
sample_minus_px = sample_ext - Px_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
oriented_error_next = sample_minus_px;
|
||||
if (context_negative_latched) begin
|
||||
oriented_error_next = -sample_minus_px;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
neg_oriented_error_next = -oriented_error_latched;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
quotient_negative_next = 1'b1;
|
||||
division_numerator_positive = oriented_error_latched + $signed({27'd0, NEAR_latched});
|
||||
division_numerator_negative = neg_oriented_error_next + $signed({27'd0, NEAR_latched});
|
||||
division_numerator_next = division_numerator_negative[DIV_WIDTH-1:0];
|
||||
if (oriented_error_latched > 33'sd0) begin
|
||||
quotient_negative_next = 1'b0;
|
||||
division_numerator_next = division_numerator_positive[DIV_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
divisor_small_next = {NEAR_latched[4:0], 1'b1};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// ceil(2^24 / (2*NEAR+1)) for NEAR=1..31. The next pipeline stage
|
||||
// corrects the possible +1 quotient overshoot by checking q*d > n.
|
||||
reciprocal_magic_next = 23'd0;
|
||||
case (NEAR_latched[4:0])
|
||||
5'd1: begin
|
||||
reciprocal_magic_next = 23'd5592406;
|
||||
end
|
||||
|
||||
5'd2: begin
|
||||
reciprocal_magic_next = 23'd3355444;
|
||||
end
|
||||
|
||||
5'd3: begin
|
||||
reciprocal_magic_next = 23'd2396746;
|
||||
end
|
||||
|
||||
5'd4: begin
|
||||
reciprocal_magic_next = 23'd1864136;
|
||||
end
|
||||
|
||||
5'd5: begin
|
||||
reciprocal_magic_next = 23'd1525202;
|
||||
end
|
||||
|
||||
5'd6: begin
|
||||
reciprocal_magic_next = 23'd1290556;
|
||||
end
|
||||
|
||||
5'd7: begin
|
||||
reciprocal_magic_next = 23'd1118482;
|
||||
end
|
||||
|
||||
5'd8: begin
|
||||
reciprocal_magic_next = 23'd986896;
|
||||
end
|
||||
|
||||
5'd9: begin
|
||||
reciprocal_magic_next = 23'd883012;
|
||||
end
|
||||
|
||||
5'd10: begin
|
||||
reciprocal_magic_next = 23'd798916;
|
||||
end
|
||||
|
||||
5'd11: begin
|
||||
reciprocal_magic_next = 23'd729445;
|
||||
end
|
||||
|
||||
5'd12: begin
|
||||
reciprocal_magic_next = 23'd671089;
|
||||
end
|
||||
|
||||
5'd13: begin
|
||||
reciprocal_magic_next = 23'd621379;
|
||||
end
|
||||
|
||||
5'd14: begin
|
||||
reciprocal_magic_next = 23'd578525;
|
||||
end
|
||||
|
||||
5'd15: begin
|
||||
reciprocal_magic_next = 23'd541201;
|
||||
end
|
||||
|
||||
5'd16: begin
|
||||
reciprocal_magic_next = 23'd508401;
|
||||
end
|
||||
|
||||
5'd17: begin
|
||||
reciprocal_magic_next = 23'd479350;
|
||||
end
|
||||
|
||||
5'd18: begin
|
||||
reciprocal_magic_next = 23'd453439;
|
||||
end
|
||||
|
||||
5'd19: begin
|
||||
reciprocal_magic_next = 23'd430186;
|
||||
end
|
||||
|
||||
5'd20: begin
|
||||
reciprocal_magic_next = 23'd409201;
|
||||
end
|
||||
|
||||
5'd21: begin
|
||||
reciprocal_magic_next = 23'd390168;
|
||||
end
|
||||
|
||||
5'd22: begin
|
||||
reciprocal_magic_next = 23'd372828;
|
||||
end
|
||||
|
||||
5'd23: begin
|
||||
reciprocal_magic_next = 23'd356963;
|
||||
end
|
||||
|
||||
5'd24: begin
|
||||
reciprocal_magic_next = 23'd342393;
|
||||
end
|
||||
|
||||
5'd25: begin
|
||||
reciprocal_magic_next = 23'd328966;
|
||||
end
|
||||
|
||||
5'd26: begin
|
||||
reciprocal_magic_next = 23'd316552;
|
||||
end
|
||||
|
||||
5'd27: begin
|
||||
reciprocal_magic_next = 23'd305041;
|
||||
end
|
||||
|
||||
5'd28: begin
|
||||
reciprocal_magic_next = 23'd294338;
|
||||
end
|
||||
|
||||
5'd29: begin
|
||||
reciprocal_magic_next = 23'd284360;
|
||||
end
|
||||
|
||||
5'd30: begin
|
||||
reciprocal_magic_next = 23'd275037;
|
||||
end
|
||||
|
||||
5'd31: begin
|
||||
reciprocal_magic_next = 23'd266306;
|
||||
end
|
||||
|
||||
default: begin
|
||||
reciprocal_magic_next = 23'd0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
div_dividend_product_ext = {{RECIP_MAGIC_WIDTH{1'b0}}, div_dividend};
|
||||
div_magic_product_ext = {{DIV_WIDTH{1'b0}}, div_magic};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
div_product_next = div_dividend_product_ext * div_magic_product_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_quotient_est = div_product >> RECIP_SHIFT;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_quotient_est_ext = {{6{1'b0}}, recip_quotient_est};
|
||||
recip_divisor_ext = {{DIV_WIDTH{1'b0}}, div_denominator};
|
||||
recip_dividend_ext = {{6{1'b0}}, div_dividend};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_check_product = recip_quotient_est_ext * recip_divisor_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_quotient_corrected = recip_quotient_est_latched;
|
||||
if (recip_check_product_latched > recip_dividend_ext_latched) begin
|
||||
recip_quotient_corrected = recip_quotient_est_latched - {{(DIV_WIDTH-1){1'b0}}, 1'b1};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
quotient_signed = $signed({16'd0, div_quotient});
|
||||
if (quotient_negative_latched) begin
|
||||
quotient_signed = -$signed({16'd0, div_quotient});
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
raw_Errval = quotient_signed;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
range_ext = $signed({16'd0, RANGE_latched});
|
||||
range_midpoint_ext = $signed({16'd0, ((RANGE_latched + 17'd1) >> 1)});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
modulo_Errval_after_add = raw_Errval;
|
||||
if (raw_Errval < 33'sd0) begin
|
||||
modulo_Errval_after_add = raw_Errval + range_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
modulo_Errval = modulo_Errval_after_add_latched;
|
||||
if (modulo_Errval_after_add_latched >= range_midpoint_ext) begin
|
||||
modulo_Errval = modulo_Errval_after_add_latched - range_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
sign_restored_Errval = modulo_Errval_latched;
|
||||
if (context_negative_latched) begin
|
||||
sign_restored_Errval = -modulo_Errval_latched;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Annex A.5 reconstruction base Px. The odd-scale multiplier products are
|
||||
// computed by the shared helpers above and registered in STATE_RECON_MUL.
|
||||
reconstruction_base = $signed({25'd0, Px_latched});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
reconstruction_sum = reconstruction_base_latched + dequantized_error_latched;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
maxval_ext = (41'sd1 <<< PIX_WIDTH) - 41'sd1;
|
||||
near_ext = $signed({35'd0, NEAR_latched});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
reconstruction_fixed = reconstruction_sum_latched;
|
||||
if (reconstruction_sum_latched < negative_near_latched) begin
|
||||
reconstruction_fixed = reconstruction_sum_latched + range_scaled_latched;
|
||||
end else if (reconstruction_sum_latched > maxval_plus_near_latched) begin
|
||||
reconstruction_fixed = reconstruction_sum_latched - range_scaled_latched;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
reconstructed_next = reconstruction_fixed_latched[PIX_WIDTH-1:0];
|
||||
if (reconstruction_fixed_latched < 41'sd0) begin
|
||||
reconstructed_next = {PIX_WIDTH{1'b0}};
|
||||
end else if (reconstruction_fixed_latched > maxval_ext_latched) begin
|
||||
reconstructed_next = {PIX_WIDTH{1'b1}};
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
sample_latched <= {PIX_WIDTH{1'b0}};
|
||||
Px_latched <= {PIX_WIDTH{1'b0}};
|
||||
x_latched <= 13'd0;
|
||||
y_latched <= 13'd0;
|
||||
strip_first_latched <= 1'b0;
|
||||
strip_last_latched <= 1'b0;
|
||||
context_index_latched <= 9'd0;
|
||||
context_negative_latched <= 1'b0;
|
||||
run_mode_latched <= 1'b0;
|
||||
A_latched <= 32'd0;
|
||||
B_latched <= 32'sd0;
|
||||
C_latched <= 9'sd0;
|
||||
N_latched <= 16'd0;
|
||||
RANGE_latched <= 17'd0;
|
||||
qbpp_latched <= 5'd0;
|
||||
LIMIT_latched <= 7'd0;
|
||||
NEAR_latched <= 6'd0;
|
||||
oriented_error_latched <= 33'sd0;
|
||||
quotient_negative_latched <= 1'b0;
|
||||
div_dividend <= {DIV_WIDTH{1'b0}};
|
||||
div_quotient <= {DIV_WIDTH{1'b0}};
|
||||
div_denominator <= 6'd0;
|
||||
div_magic <= {RECIP_MAGIC_WIDTH{1'b0}};
|
||||
div_product <= {RECIP_PRODUCT_WIDTH{1'b0}};
|
||||
recip_quotient_est_latched <= {DIV_WIDTH{1'b0}};
|
||||
recip_check_product_latched <= {RECIP_CHECK_WIDTH{1'b0}};
|
||||
recip_dividend_ext_latched <= {RECIP_CHECK_WIDTH{1'b0}};
|
||||
modulo_Errval_after_add_latched <= 33'sd0;
|
||||
modulo_Errval_latched <= 33'sd0;
|
||||
sign_restored_Errval_latched <= 33'sd0;
|
||||
sign_restored_mul_latched <= 33'sd0;
|
||||
near_scale_latched <= 7'sd1;
|
||||
dequantized_error_latched <= 41'sd0;
|
||||
reconstruction_base_latched <= 41'sd0;
|
||||
reconstruction_sum_latched <= 41'sd0;
|
||||
range_scaled_latched <= 41'sd0;
|
||||
reconstruction_fixed_latched <= 41'sd0;
|
||||
maxval_ext_latched <= 41'sd0;
|
||||
near_ext_latched <= 41'sd0;
|
||||
maxval_plus_near_latched <= 41'sd0;
|
||||
negative_near_latched <= 41'sd0;
|
||||
reconstructed_calc_latched <= {PIX_WIDTH{1'b0}};
|
||||
err_valid <= 1'b0;
|
||||
Errval <= 32'sd0;
|
||||
reconstructed_sample <= {PIX_WIDTH{1'b0}};
|
||||
err_x <= 13'd0;
|
||||
err_y <= 13'd0;
|
||||
err_strip_first_pixel <= 1'b0;
|
||||
err_strip_last_pixel <= 1'b0;
|
||||
err_context_index <= 9'd0;
|
||||
err_context_negative <= 1'b0;
|
||||
err_run_mode_context <= 1'b0;
|
||||
err_qbpp <= 5'd0;
|
||||
err_LIMIT <= 7'd0;
|
||||
err_A <= 32'd0;
|
||||
err_B <= 32'sd0;
|
||||
err_C <= 9'sd0;
|
||||
err_N <= 16'd0;
|
||||
end else begin
|
||||
if (err_valid && err_ready) begin
|
||||
err_valid <= 1'b0;
|
||||
end
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (accept_corrected) begin
|
||||
sample_latched <= corrected_sample;
|
||||
Px_latched <= corrected_Px;
|
||||
x_latched <= corrected_x;
|
||||
y_latched <= corrected_y;
|
||||
strip_first_latched <= corrected_strip_first_pixel;
|
||||
strip_last_latched <= corrected_strip_last_pixel;
|
||||
context_index_latched <= corrected_context_index;
|
||||
context_negative_latched <= corrected_context_negative;
|
||||
run_mode_latched <= corrected_run_mode_context;
|
||||
A_latched <= corrected_A;
|
||||
B_latched <= corrected_B;
|
||||
C_latched <= corrected_C;
|
||||
N_latched <= corrected_N;
|
||||
RANGE_latched <= RANGE;
|
||||
qbpp_latched <= qbpp;
|
||||
LIMIT_latched <= LIMIT;
|
||||
NEAR_latched <= NEAR;
|
||||
div_quotient <= {DIV_WIDTH{1'b0}};
|
||||
div_product <= {RECIP_PRODUCT_WIDTH{1'b0}};
|
||||
state <= STATE_INPUT_PREP;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_INPUT_PREP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval = Ix - Px, sign orientation, |Errval| + NEAR
|
||||
// Stage note : Corrected input fields were captured in STATE_IDLE.
|
||||
// This stage registers the divisor numerator before the reciprocal
|
||||
// DSP multiply, breaking corrected_sample/Px to div_product timing.
|
||||
oriented_error_latched <= oriented_error_next;
|
||||
state <= STATE_NUMERATOR_PREP;
|
||||
end
|
||||
|
||||
STATE_NUMERATOR_PREP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : compute quotient sign and numerator for NEAR division
|
||||
// Stage note : Uses oriented_error_latched from STATE_INPUT_PREP so
|
||||
// the Ix-Px subtract/sign mux is separated from |Errval| + NEAR and
|
||||
// the DSP B-input register used by the reciprocal multiply.
|
||||
quotient_negative_latched <= quotient_negative_next;
|
||||
|
||||
if (NEAR_latched == 6'd0) begin
|
||||
div_quotient <= division_numerator_next;
|
||||
state <= STATE_ERRVAL_PREP;
|
||||
end else begin
|
||||
div_dividend <= division_numerator_next;
|
||||
div_denominator <= divisor_small_next;
|
||||
div_magic <= reciprocal_magic_next;
|
||||
state <= STATE_DIV_MUL;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_DIV_MUL: begin
|
||||
div_product <= div_product_next;
|
||||
state <= STATE_DIV_CHECK;
|
||||
end
|
||||
|
||||
STATE_DIV_CHECK: begin
|
||||
// Stage note : Register q*d and dividend before the final quotient
|
||||
// correction. This keeps the DSP product output out of the carry
|
||||
// chain that subtracts one from the reciprocal quotient estimate.
|
||||
recip_quotient_est_latched <= recip_quotient_est;
|
||||
recip_check_product_latched <= recip_check_product;
|
||||
recip_dividend_ext_latched <= recip_dividend_ext;
|
||||
state <= STATE_DIV_CORRECT;
|
||||
end
|
||||
|
||||
STATE_DIV_CORRECT: begin
|
||||
div_quotient <= recip_quotient_corrected;
|
||||
state <= STATE_ERRVAL_PREP;
|
||||
end
|
||||
|
||||
STATE_ERRVAL_PREP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization and modulo reduction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval modulo normalization, first wrap step
|
||||
// Stage note : Capture Errval+RANGE before the midpoint compare so
|
||||
// div_quotient no longer feeds both carry chains in one cycle.
|
||||
modulo_Errval_after_add_latched <= modulo_Errval_after_add;
|
||||
state <= STATE_ERRVAL;
|
||||
end
|
||||
|
||||
STATE_ERRVAL: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization and modulo reduction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval modulo normalization, midpoint wrap step
|
||||
// Stage note : STATE_ERRVAL_PREP already registered the first modulo
|
||||
// add; this state now contains only the midpoint compare/subtract.
|
||||
modulo_Errval_latched <= modulo_Errval;
|
||||
state <= STATE_ERRVAL_SIGN;
|
||||
end
|
||||
|
||||
STATE_ERRVAL_SIGN: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization and modulo reduction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : restore Errval sign after modulo normalization
|
||||
// Stage note : modulo_Errval_latched separates the divider/modulo
|
||||
// carry chain from the context sign mux and reconstruction DSP input.
|
||||
sign_restored_Errval_latched <= sign_restored_Errval;
|
||||
state <= STATE_RECON_FACTORS;
|
||||
end
|
||||
|
||||
STATE_RECON_FACTORS: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : prepare Errval and (2*NEAR+1) for dequantization
|
||||
// Stage note : Explicit operand registers give the reconstruction
|
||||
// odd-scale multiplier a clean input boundary before Errval*(2*NEAR+1).
|
||||
sign_restored_mul_latched <= sign_restored_Errval_latched;
|
||||
near_scale_latched <= $signed({NEAR_latched, 1'b1});
|
||||
state <= STATE_RECON_MUL;
|
||||
end
|
||||
|
||||
STATE_RECON_MUL: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval * (2*NEAR+1) and RANGE * (2*NEAR+1)
|
||||
// Stage note : Register products before wrap/clamp to reduce the
|
||||
// NEAR-to-Rx combinational depth at the 250 MHz target.
|
||||
dequantized_error_latched <= dequantized_error;
|
||||
reconstruction_base_latched <= reconstruction_base;
|
||||
range_scaled_latched <= range_scaled;
|
||||
maxval_ext_latched <= maxval_ext;
|
||||
near_ext_latched <= near_ext;
|
||||
state <= STATE_RECON_SUM;
|
||||
end
|
||||
|
||||
STATE_RECON_SUM: begin
|
||||
// Stage note : Register the reconstruction sum and wrap thresholds
|
||||
// before Annex A.5 range wrapping. This splits maxval/near boundary
|
||||
// comparison from the add/subtract that forms reconstruction_fixed.
|
||||
reconstruction_sum_latched <= reconstruction_sum;
|
||||
maxval_plus_near_latched <= maxval_ext_latched + near_ext_latched;
|
||||
negative_near_latched <= -near_ext_latched;
|
||||
state <= STATE_RECON_CALC;
|
||||
end
|
||||
|
||||
STATE_RECON_CALC: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : wrap Px + Errval * (2*NEAR+1) into the extended range
|
||||
// Stage note : Register the JPEG-LS wrap result before the final
|
||||
// [0, MAXVAL] clamp, splitting the reported maxval-to-Rx path.
|
||||
reconstruction_fixed_latched <= reconstruction_fixed;
|
||||
state <= STATE_RECON_CLAMP;
|
||||
end
|
||||
|
||||
STATE_RECON_CLAMP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Rx = clamp(wrapped reconstruction, 0, MAXVAL)
|
||||
// Stage note : The final sample clamp is isolated from the preceding
|
||||
// wrap add/subtract logic for 250 MHz timing closure.
|
||||
reconstructed_calc_latched <= reconstructed_next;
|
||||
state <= STATE_FINISH;
|
||||
end
|
||||
|
||||
STATE_FINISH: begin
|
||||
if (output_slot_open) begin
|
||||
err_valid <= 1'b1;
|
||||
Errval <= modulo_Errval_latched[31:0];
|
||||
reconstructed_sample <= reconstructed_calc_latched;
|
||||
err_x <= x_latched;
|
||||
err_y <= y_latched;
|
||||
err_strip_first_pixel <= strip_first_latched;
|
||||
err_strip_last_pixel <= strip_last_latched;
|
||||
err_context_index <= context_index_latched;
|
||||
err_context_negative <= context_negative_latched;
|
||||
err_run_mode_context <= run_mode_latched;
|
||||
err_qbpp <= qbpp_latched;
|
||||
err_LIMIT <= LIMIT_latched;
|
||||
err_A <= A_latched;
|
||||
err_B <= B_latched;
|
||||
err_C <= C_latched;
|
||||
err_N <= N_latched;
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1476
fpga/verilog/jls_run_mode.sv
Normal file
1476
fpga/verilog/jls_run_mode.sv
Normal file
File diff suppressed because it is too large
Load Diff
364
fpga/verilog/jls_scan_ctrl.sv
Normal file
364
fpga/verilog/jls_scan_ctrl.sv
Normal file
@@ -0,0 +1,364 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 control procedure, Annex D.1-D.3 scan control
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Start one JPEG-LS scan per standalone strip frame
|
||||
// Trace : docs/jls_traceability.md#jls-scan-control
|
||||
// Example : The first pixel of each strip emits strip_start_valid.
|
||||
//
|
||||
// Scan controller for the strip-frame architecture. It converts pixel boundary
|
||||
// flags from jls_input_ctrl into strip start/finish commands and forwards the
|
||||
// pixel stream to the later predictor/context pipeline. A one-entry registered
|
||||
// slot breaks the input pixel_valid path away from downstream strip-start and
|
||||
// context ready/CE controls while still allowing one accepted pixel per cycle
|
||||
// when the slot drains and refills in the same cycle.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_scan_ctrl #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Number of original-image rows in one standalone JPEG-LS strip frame.
|
||||
parameter int SCAN_ROWS = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Pixel event from jls_input_ctrl is valid.
|
||||
input var logic pixel_valid,
|
||||
|
||||
// This controller accepted the current input pixel event.
|
||||
output logic pixel_ready,
|
||||
|
||||
// Grayscale input sample.
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Zero-based original-image column coordinate.
|
||||
input var logic [12:0] pixel_x,
|
||||
|
||||
// Zero-based original-image row coordinate.
|
||||
input var logic [12:0] pixel_y,
|
||||
|
||||
// First pixel of the current standalone strip frame.
|
||||
input var logic strip_first_pixel,
|
||||
|
||||
// Last pixel of the current standalone strip frame.
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// First pixel of the current original input image.
|
||||
input var logic image_first_pixel,
|
||||
|
||||
// Last pixel of the current original input image.
|
||||
input var logic image_last_pixel,
|
||||
|
||||
// Effective image width after runtime validation and fallback.
|
||||
input var logic [12:0] active_pic_col,
|
||||
|
||||
// Latched ratio for the current original image.
|
||||
input var logic [3:0] active_ratio,
|
||||
|
||||
// Dynamic NEAR value from jls_near_ctrl for non-first strips.
|
||||
input var logic [5:0] current_near,
|
||||
|
||||
// Pixel event forwarded to the predictor/context pipeline.
|
||||
output logic enc_pixel_valid,
|
||||
|
||||
// Downstream predictor/context pipeline can accept the forwarded pixel.
|
||||
input var logic enc_pixel_ready,
|
||||
|
||||
// Forwarded grayscale sample.
|
||||
output logic [PIX_WIDTH-1:0] enc_pixel_sample,
|
||||
|
||||
// Forwarded original-image column coordinate.
|
||||
output logic [12:0] enc_pixel_x,
|
||||
|
||||
// Forwarded original-image row coordinate.
|
||||
output logic [12:0] enc_pixel_y,
|
||||
|
||||
// Forwarded row-last flag, registered with enc_pixel_x/y. This is distinct
|
||||
// from enc_strip_last_pixel, which marks the last pixel of the whole strip.
|
||||
output logic enc_row_last_pixel,
|
||||
|
||||
// Forwarded first-pixel flag for strip-local boundary handling.
|
||||
output logic enc_strip_first_pixel,
|
||||
|
||||
// Forwarded last-pixel flag for strip-local flush handling.
|
||||
output logic enc_strip_last_pixel,
|
||||
|
||||
// Strip start command for jls_header_writer.
|
||||
output logic strip_start_valid,
|
||||
|
||||
// Header writer can accept a strip start command.
|
||||
input var logic strip_start_ready,
|
||||
|
||||
// Marks the first strip of an original input image.
|
||||
output logic original_image_first_strip,
|
||||
|
||||
// Strip frame width written to SOF55.X.
|
||||
output logic [12:0] strip_width,
|
||||
|
||||
// Strip frame height written to SOF55.Y.
|
||||
output logic [12:0] strip_height,
|
||||
|
||||
// NEAR value used by this strip frame.
|
||||
output logic [5:0] strip_near,
|
||||
|
||||
// Strip finish command after the last strip pixel enters the encode pipeline.
|
||||
output logic strip_finish_valid,
|
||||
|
||||
// Downstream finish handler can accept the strip finish command.
|
||||
input var logic strip_finish_ready,
|
||||
|
||||
// Marks the last strip of an original input image.
|
||||
output logic original_image_last_strip,
|
||||
|
||||
// Number of pixels in the completed strip frame.
|
||||
output logic [31:0] strip_pixel_count,
|
||||
|
||||
// Original-image start pulse for jls_near_ctrl.
|
||||
output logic near_image_start_valid,
|
||||
|
||||
// Ratio forwarded to jls_near_ctrl at original-image start.
|
||||
output logic [3:0] near_image_ratio
|
||||
);
|
||||
|
||||
// Strip height as a sized SOF55.Y field.
|
||||
localparam logic [12:0] SCAN_ROWS_VALUE = SCAN_ROWS;
|
||||
|
||||
// Running count of pixels accepted in the current strip.
|
||||
logic [31:0] strip_pixel_count_running;
|
||||
logic [31:0] strip_pixel_count_next;
|
||||
|
||||
// One-entry timing slot between the FIFO-facing input controller and the
|
||||
// downstream JPEG-LS strip/encode pipeline.
|
||||
logic slot_valid;
|
||||
logic [PIX_WIDTH-1:0] slot_sample;
|
||||
logic [12:0] slot_x;
|
||||
logic [12:0] slot_y;
|
||||
logic slot_strip_first_pixel;
|
||||
logic slot_strip_last_pixel;
|
||||
logic slot_row_last_pixel;
|
||||
logic slot_image_first_pixel;
|
||||
logic slot_image_last_pixel;
|
||||
logic [12:0] slot_active_pic_col;
|
||||
logic [5:0] slot_strip_near;
|
||||
|
||||
// Independent readiness terms for input loading, strip commands, and encode
|
||||
// pipeline forwarding.
|
||||
logic input_start_path_ready;
|
||||
logic input_finish_path_ready;
|
||||
logic input_boundary_ready;
|
||||
logic start_path_ready;
|
||||
logic finish_path_ready;
|
||||
logic all_paths_ready;
|
||||
logic slot_open_for_input;
|
||||
logic accepted_input;
|
||||
logic forward_slot;
|
||||
logic [12:0] input_row_last_col;
|
||||
logic input_row_last_pixel;
|
||||
|
||||
// First-strip NEAR must be zero even if jls_near_ctrl has not yet reset on
|
||||
// the same SOF pixel cycle.
|
||||
logic [5:0] selected_strip_near;
|
||||
|
||||
always_comb begin
|
||||
input_start_path_ready = 1'b1;
|
||||
if (strip_first_pixel && !strip_start_ready) begin
|
||||
input_start_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_finish_path_ready = 1'b1;
|
||||
if (strip_last_pixel && !strip_finish_ready) begin
|
||||
input_finish_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_boundary_ready = 1'b0;
|
||||
if (input_start_path_ready && input_finish_path_ready) begin
|
||||
input_boundary_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
start_path_ready = 1'b1;
|
||||
if (slot_strip_first_pixel && !strip_start_ready) begin
|
||||
start_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
finish_path_ready = 1'b1;
|
||||
if (slot_strip_last_pixel && !strip_finish_ready) begin
|
||||
finish_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
all_paths_ready = 1'b0;
|
||||
if (start_path_ready && finish_path_ready && enc_pixel_ready) begin
|
||||
all_paths_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
forward_slot = 1'b0;
|
||||
if (slot_valid && all_paths_ready) begin
|
||||
forward_slot = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_open_for_input = 1'b0;
|
||||
if (!slot_valid || forward_slot) begin
|
||||
slot_open_for_input = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = 1'b0;
|
||||
if (input_boundary_ready && slot_open_for_input) begin
|
||||
pixel_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accepted_input = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
accepted_input = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
enc_pixel_valid = 1'b0;
|
||||
if (slot_valid && start_path_ready && finish_path_ready) begin
|
||||
enc_pixel_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
enc_pixel_sample = slot_sample;
|
||||
enc_pixel_x = slot_x;
|
||||
enc_pixel_y = slot_y;
|
||||
enc_row_last_pixel = slot_row_last_pixel;
|
||||
enc_strip_first_pixel = slot_strip_first_pixel;
|
||||
enc_strip_last_pixel = slot_strip_last_pixel;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_start_valid = 1'b0;
|
||||
if (slot_valid && slot_strip_first_pixel && finish_path_ready && enc_pixel_ready) begin
|
||||
strip_start_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_finish_valid = 1'b0;
|
||||
if (slot_valid && slot_strip_last_pixel && start_path_ready && enc_pixel_ready) begin
|
||||
strip_finish_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
original_image_first_strip = slot_image_first_pixel;
|
||||
original_image_last_strip = slot_image_last_pixel;
|
||||
strip_width = slot_active_pic_col;
|
||||
strip_height = SCAN_ROWS_VALUE;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
selected_strip_near = current_near;
|
||||
if (image_first_pixel) begin
|
||||
selected_strip_near = 6'd0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_row_last_col = active_pic_col - 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_row_last_pixel = 1'b0;
|
||||
if (pixel_x == input_row_last_col) begin
|
||||
input_row_last_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_near = slot_strip_near;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_pixel_count_next = strip_pixel_count_running + 32'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_pixel_count = 32'd0;
|
||||
if (slot_strip_last_pixel) begin
|
||||
strip_pixel_count = strip_pixel_count_next;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_image_start_valid = 1'b0;
|
||||
if (accepted_input && image_first_pixel) begin
|
||||
near_image_start_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_image_ratio = active_ratio;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
slot_valid <= 1'b0;
|
||||
slot_sample <= {PIX_WIDTH{1'b0}};
|
||||
slot_x <= 13'd0;
|
||||
slot_y <= 13'd0;
|
||||
slot_strip_first_pixel <= 1'b0;
|
||||
slot_strip_last_pixel <= 1'b0;
|
||||
slot_row_last_pixel <= 1'b0;
|
||||
slot_image_first_pixel <= 1'b0;
|
||||
slot_image_last_pixel <= 1'b0;
|
||||
slot_active_pic_col <= 13'd0;
|
||||
slot_strip_near <= 6'd0;
|
||||
strip_pixel_count_running <= 32'd0;
|
||||
end else begin
|
||||
if (forward_slot) begin
|
||||
if (slot_strip_last_pixel) begin
|
||||
strip_pixel_count_running <= 32'd0;
|
||||
end else if (slot_strip_first_pixel) begin
|
||||
strip_pixel_count_running <= 32'd1;
|
||||
end else begin
|
||||
strip_pixel_count_running <= strip_pixel_count_next;
|
||||
end
|
||||
end
|
||||
|
||||
if (accepted_input) begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= pixel_sample;
|
||||
slot_x <= pixel_x;
|
||||
slot_y <= pixel_y;
|
||||
slot_strip_first_pixel <= strip_first_pixel;
|
||||
slot_strip_last_pixel <= strip_last_pixel;
|
||||
slot_row_last_pixel <= input_row_last_pixel;
|
||||
slot_image_first_pixel <= image_first_pixel;
|
||||
slot_image_last_pixel <= image_last_pixel;
|
||||
slot_active_pic_col <= active_pic_col;
|
||||
slot_strip_near <= selected_strip_near;
|
||||
end else if (forward_slot) begin
|
||||
slot_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1682
fpga/verilog/jpeg_ls_encoder_top.sv
Normal file
1682
fpga/verilog/jpeg_ls_encoder_top.sv
Normal file
File diff suppressed because it is too large
Load Diff
24
fpga/verilog/jpeg_ls_rtl.f
Normal file
24
fpga/verilog/jpeg_ls_rtl.f
Normal file
@@ -0,0 +1,24 @@
|
||||
fpga/verilog/jls_common_pkg.sv
|
||||
fpga/verilog/jls_preset_defaults.sv
|
||||
fpga/verilog/jls_coding_params.sv
|
||||
fpga/verilog/jls_input_ctrl.sv
|
||||
fpga/verilog/jls_scan_ctrl.sv
|
||||
fpga/verilog/jls_neighbor_provider.sv
|
||||
fpga/verilog/jls_mode_router.sv
|
||||
fpga/verilog/jls_predictor.sv
|
||||
fpga/verilog/jls_context_quantizer.sv
|
||||
fpga/verilog/jls_context_model.sv
|
||||
fpga/verilog/jls_prediction_corrector.sv
|
||||
fpga/verilog/jls_near_scale_mul.sv
|
||||
fpga/verilog/jls_regular_error_quantizer.sv
|
||||
fpga/verilog/jls_header_writer.sv
|
||||
fpga/verilog/jls_near_ctrl.sv
|
||||
fpga/verilog/jls_context_memory.sv
|
||||
fpga/verilog/jls_context_update.sv
|
||||
fpga/verilog/jls_error_mapper.sv
|
||||
fpga/verilog/jls_run_mode.sv
|
||||
fpga/verilog/jls_golomb_encoder.sv
|
||||
fpga/verilog/jls_bit_packer.sv
|
||||
fpga/verilog/jls_byte_arbiter.sv
|
||||
fpga/verilog/jls_output_buffer.sv
|
||||
fpga/verilog/jpeg_ls_encoder_top.sv
|
||||
Reference in New Issue
Block a user