Initial JPEG-LS FPGA encoder baseline with tooling and timeout fix

This commit is contained in:
2026-04-16 18:55:08 +08:00
commit e4fdbdfeec
150 changed files with 25796 additions and 0 deletions

View File

@@ -0,0 +1,289 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex C.1-C.4 entropy-coded segment syntax
// Figure : N/A
// Table : N/A
// Pseudocode : JPEG-LS bitstream packing and marker/zero-bit stuffing
// Trace : docs/jls_traceability.md#bit-packing-and-stuffing
// Example : Data bits 0xFF followed by seven 1 bits produce bytes FF 7F.
//
// Bit packer for JPEG-LS scan payload bytes. The input code word is left
// aligned: the first bit to write is code_bits[MAX_CODE_BITS-1]. After a data
// byte equal to 0xFF is emitted, the packer inserts one stuffed zero bit before
// the next data bit as required by JPEG-LS marker/zero-bit stuffing.
`default_nettype none
module jls_bit_packer #(
// Maximum code bits accepted in one code event. Longer Golomb codes must be
// split by the upstream encoder into multiple ordered events.
parameter int MAX_CODE_BITS = 64
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Variable-length code event is valid.
input var logic code_valid,
// This packer can accept a new code event.
output logic code_ready,
// Left-aligned code bits. The first emitted bit is code_bits[MAX_CODE_BITS-1].
input var logic [MAX_CODE_BITS-1:0] code_bits,
// Number of valid bits in code_bits.
input var logic [6:0] code_bit_count,
// Flush request before EOI. The packer pads the current byte with zeros.
input var logic flush_valid,
// This packer can accept a flush request.
output logic flush_ready,
// Flush completed pulse.
output logic flush_done,
// Packed scan payload byte is valid.
output logic byte_valid,
// Downstream byte buffer can accept byte_data.
input var logic byte_ready,
// Packed scan payload byte.
output logic [7:0] byte_data
);
// Code shift register and remaining bit count.
logic [MAX_CODE_BITS-1:0] code_shift_reg;
logic [6:0] bits_remaining;
// Current partially filled output byte. Bits are filled from MSB to LSB.
logic [7:0] partial_byte;
logic [3:0] partial_count;
// A zero bit must be inserted before the next data bit after emitting 0xFF.
logic stuff_zero_pending;
// Flush is active while padding and any required stuffed zero bit remain.
logic flush_active;
// Handshake and processing guards.
logic output_slot_open;
logic code_event_ready;
logic accept_code;
logic accept_flush;
logic pack_step_active;
// Combinational work registers for one byte-pack step.
logic [MAX_CODE_BITS-1:0] work_shift_reg;
logic [6:0] work_bits_remaining;
logic [7:0] work_partial_byte;
logic [3:0] work_partial_count;
logic work_stuff_zero_pending;
logic work_flush_active;
logic work_emit_byte;
logic [7:0] work_emit_data;
logic work_flush_done;
logic [3:0] available_slots;
logic [3:0] bits_to_take;
logic [6:0] available_slots_ext;
logic [7:0] code_top_byte;
logic [7:0] take_mask;
logic [7:0] insert_bits;
always_comb begin
output_slot_open = 1'b0;
if (!byte_valid || byte_ready) begin
output_slot_open = 1'b1;
end
end
always_comb begin
code_event_ready = 1'b0;
if (!flush_active && bits_remaining == 7'd0 && output_slot_open) begin
code_event_ready = 1'b1;
end
end
always_comb begin
code_ready = code_event_ready;
end
always_comb begin
flush_ready = 1'b0;
if (!flush_active && bits_remaining == 7'd0 && output_slot_open && !code_valid) begin
flush_ready = 1'b1;
end
end
always_comb begin
accept_code = 1'b0;
if (code_valid && code_ready && code_bit_count != 7'd0) begin
accept_code = 1'b1;
end
end
always_comb begin
accept_flush = 1'b0;
if (flush_valid && flush_ready) begin
accept_flush = 1'b1;
end
end
always_comb begin
pack_step_active = 1'b0;
if (output_slot_open && (bits_remaining != 7'd0 || flush_active || stuff_zero_pending)) begin
pack_step_active = 1'b1;
end
end
always_comb begin
work_shift_reg = code_shift_reg;
work_bits_remaining = bits_remaining;
work_partial_byte = partial_byte;
work_partial_count = partial_count;
work_stuff_zero_pending = stuff_zero_pending;
work_flush_active = flush_active;
work_emit_byte = 1'b0;
work_emit_data = 8'h00;
work_flush_done = 1'b0;
available_slots = 4'd8 - work_partial_count;
available_slots_ext = {3'd0, available_slots};
bits_to_take = 4'd0;
if (work_bits_remaining != 7'd0) begin
bits_to_take = work_bits_remaining[3:0];
if (work_bits_remaining >= available_slots_ext) begin
bits_to_take = available_slots;
end
end
code_top_byte = work_shift_reg[MAX_CODE_BITS-1 -: 8];
take_mask = 8'h00;
case (bits_to_take)
4'd1: begin
take_mask = 8'h80;
end
4'd2: begin
take_mask = 8'hC0;
end
4'd3: begin
take_mask = 8'hE0;
end
4'd4: begin
take_mask = 8'hF0;
end
4'd5: begin
take_mask = 8'hF8;
end
4'd6: begin
take_mask = 8'hFC;
end
4'd7: begin
take_mask = 8'hFE;
end
4'd8: begin
take_mask = 8'hFF;
end
default: begin
take_mask = 8'h00;
end
endcase
insert_bits = (code_top_byte & take_mask) >> work_partial_count[2:0];
if (pack_step_active) begin
if (work_stuff_zero_pending) begin
work_partial_count = work_partial_count + 4'd1;
work_stuff_zero_pending = 1'b0;
if (work_partial_count == 4'd8) begin
work_emit_byte = 1'b1;
work_emit_data = work_partial_byte;
work_partial_byte = 8'h00;
work_partial_count = 4'd0;
end
end else if (work_bits_remaining != 7'd0) begin
work_partial_byte = work_partial_byte | insert_bits;
work_partial_count = work_partial_count + bits_to_take;
work_shift_reg = work_shift_reg << bits_to_take;
work_bits_remaining = work_bits_remaining - {3'd0, bits_to_take};
if (work_partial_count == 4'd8) begin
work_emit_byte = 1'b1;
work_emit_data = work_partial_byte;
if (work_partial_byte == 8'hFF) begin
work_stuff_zero_pending = 1'b1;
end
work_partial_byte = 8'h00;
work_partial_count = 4'd0;
end
end else if (work_flush_active && work_partial_count != 4'd0) begin
work_emit_byte = 1'b1;
work_emit_data = work_partial_byte;
if (work_partial_byte == 8'hFF) begin
work_stuff_zero_pending = 1'b1;
end
work_partial_byte = 8'h00;
work_partial_count = 4'd0;
end else if (work_flush_active && work_partial_count == 4'd0 &&
!work_stuff_zero_pending) begin
work_flush_active = 1'b0;
work_flush_done = 1'b1;
end
end
end
always_ff @(posedge clk) begin
if (rst) begin
code_shift_reg <= {MAX_CODE_BITS{1'b0}};
bits_remaining <= 7'd0;
partial_byte <= 8'h00;
partial_count <= 4'd0;
stuff_zero_pending <= 1'b0;
flush_active <= 1'b0;
flush_done <= 1'b0;
byte_valid <= 1'b0;
byte_data <= 8'h00;
end else begin
flush_done <= 1'b0;
if (output_slot_open) begin
byte_valid <= work_emit_byte;
byte_data <= work_emit_data;
end
if (pack_step_active) begin
code_shift_reg <= work_shift_reg;
bits_remaining <= work_bits_remaining;
partial_byte <= work_partial_byte;
partial_count <= work_partial_count;
stuff_zero_pending <= work_stuff_zero_pending;
flush_active <= work_flush_active;
flush_done <= work_flush_done;
end
if (accept_code) begin
code_shift_reg <= code_bits;
bits_remaining <= code_bit_count;
end
if (accept_flush) begin
flush_active <= 1'b1;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,104 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex C.1-C.4 marker stream byte order
// Figure : N/A
// Table : N/A
// Pseudocode : Marker bytes and entropy-coded bytes in stream order
// Trace : docs/jls_traceability.md#jls-header-markers
// Example : Header byte FF is forwarded before a waiting payload byte.
//
// Two-input byte-stream arbiter. Header/EOI bytes have priority over payload
// bytes so a strip frame is emitted as SOI/SOF/LSE/SOS, payload, and then EOI.
`default_nettype none
module jls_byte_arbiter (
// Header or EOI byte from jls_header_writer is valid.
input var logic header_valid,
// The selected downstream buffer accepted the header byte.
output logic header_ready,
// Header byte and original-image-start sideband.
input var logic [7:0] header_data,
input var logic header_original_image_start,
// Scan payload byte from jls_bit_packer is valid.
input var logic payload_valid,
// The selected downstream buffer accepted the payload byte.
output logic payload_ready,
// Payload byte. It never carries original-image-start sideband.
input var logic [7:0] payload_data,
// Arbitrated byte event to jls_output_buffer.
output logic byte_valid,
// jls_output_buffer accepted the arbitrated byte event.
input var logic byte_ready,
// Arbitrated byte and sideband.
output logic [7:0] byte_data,
output logic original_image_start
);
// Header stream has priority whenever it is valid.
logic select_header;
logic select_payload;
always_comb begin
select_header = header_valid;
end
always_comb begin
select_payload = 1'b0;
if (!select_header && payload_valid) begin
select_payload = 1'b1;
end
end
always_comb begin
byte_valid = 1'b0;
if (select_header || select_payload) begin
byte_valid = 1'b1;
end
end
always_comb begin
byte_data = 8'h00;
original_image_start = 1'b0;
case ({select_header, select_payload})
2'b10: begin
byte_data = header_data;
original_image_start = header_original_image_start;
end
2'b01: begin
byte_data = payload_data;
end
default: begin
byte_data = 8'h00;
original_image_start = 1'b0;
end
endcase
end
always_comb begin
header_ready = 1'b0;
if (select_header && byte_ready) begin
header_ready = 1'b1;
end
end
always_comb begin
payload_ready = 1'b0;
if (select_payload && byte_ready) begin
payload_ready = 1'b1;
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,251 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.2 initialization, Annex G.2 variables
// Figure : N/A
// Table : N/A
// Pseudocode : RANGE, qbpp, and LIMIT derivation from MAXVAL and NEAR
// Trace : docs/jls_traceability.md#jls-coding-parameters
// Example : PIX_WIDTH=8,NEAR=0 gives RANGE=256,qbpp=8,LIMIT=32.
//
// JPEG-LS coding parameter lookup. RANGE and qbpp depend on NEAR, but NEAR is
// limited to 0..31 in this project. A lookup table avoids a synthesized
// runtime divider and keeps this strip-level control path timing friendly.
`default_nettype none
module jls_coding_params #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// JPEG-LS NEAR parameter for the current strip frame.
input var logic [5:0] NEAR,
// JPEG-LS RANGE parameter.
output logic [16:0] RANGE,
// JPEG-LS quantized bits per sample, ceil(log2(RANGE)).
output logic [4:0] qbpp,
// JPEG-LS LIMIT parameter used by regular-mode Golomb coding.
output logic [6:0] LIMIT
);
// Defensive clamp even though upstream NEAR is already limited to 31.
logic [5:0] near_clamped;
logic [16:0] range_next;
logic [4:0] qbpp_next;
logic [6:0] limit_next;
always_comb begin
near_clamped = NEAR;
if (NEAR > 6'd31) begin
near_clamped = 6'd31;
end
end
always_comb begin
range_next = 17'd65536;
qbpp_next = 5'd16;
limit_next = 7'd64;
case (PIX_WIDTH)
8: begin
limit_next = 7'd32;
case (near_clamped)
6'd0: begin range_next = 17'd256; qbpp_next = 5'd8; end
6'd1: begin range_next = 17'd86; qbpp_next = 5'd7; end
6'd2: begin range_next = 17'd52; qbpp_next = 5'd6; end
6'd3: begin range_next = 17'd38; qbpp_next = 5'd6; end
6'd4: begin range_next = 17'd30; qbpp_next = 5'd5; end
6'd5: begin range_next = 17'd25; qbpp_next = 5'd5; end
6'd6: begin range_next = 17'd21; qbpp_next = 5'd5; end
6'd7: begin range_next = 17'd18; qbpp_next = 5'd5; end
6'd8: begin range_next = 17'd16; qbpp_next = 5'd4; end
6'd9: begin range_next = 17'd15; qbpp_next = 5'd4; end
6'd10: begin range_next = 17'd14; qbpp_next = 5'd4; end
6'd11: begin range_next = 17'd13; qbpp_next = 5'd4; end
6'd12: begin range_next = 17'd12; qbpp_next = 5'd4; end
6'd13: begin range_next = 17'd11; qbpp_next = 5'd4; end
6'd14: begin range_next = 17'd10; qbpp_next = 5'd4; end
6'd15: begin range_next = 17'd10; qbpp_next = 5'd4; end
6'd16: begin range_next = 17'd9; qbpp_next = 5'd4; end
6'd17: begin range_next = 17'd9; qbpp_next = 5'd4; end
6'd18: begin range_next = 17'd8; qbpp_next = 5'd3; end
6'd19: begin range_next = 17'd8; qbpp_next = 5'd3; end
6'd20: begin range_next = 17'd8; qbpp_next = 5'd3; end
6'd21: begin range_next = 17'd7; qbpp_next = 5'd3; end
6'd22: begin range_next = 17'd7; qbpp_next = 5'd3; end
6'd23: begin range_next = 17'd7; qbpp_next = 5'd3; end
6'd24: begin range_next = 17'd7; qbpp_next = 5'd3; end
6'd25: begin range_next = 17'd6; qbpp_next = 5'd3; end
6'd26: begin range_next = 17'd6; qbpp_next = 5'd3; end
6'd27: begin range_next = 17'd6; qbpp_next = 5'd3; end
6'd28: begin range_next = 17'd6; qbpp_next = 5'd3; end
6'd29: begin range_next = 17'd6; qbpp_next = 5'd3; end
6'd30: begin range_next = 17'd6; qbpp_next = 5'd3; end
default: begin range_next = 17'd6; qbpp_next = 5'd3; end
endcase
end
10: begin
limit_next = 7'd40;
case (near_clamped)
6'd0: begin range_next = 17'd1024; qbpp_next = 5'd10; end
6'd1: begin range_next = 17'd342; qbpp_next = 5'd9; end
6'd2: begin range_next = 17'd206; qbpp_next = 5'd8; end
6'd3: begin range_next = 17'd148; qbpp_next = 5'd8; end
6'd4: begin range_next = 17'd115; qbpp_next = 5'd7; end
6'd5: begin range_next = 17'd94; qbpp_next = 5'd7; end
6'd6: begin range_next = 17'd80; qbpp_next = 5'd7; end
6'd7: begin range_next = 17'd70; qbpp_next = 5'd7; end
6'd8: begin range_next = 17'd62; qbpp_next = 5'd6; end
6'd9: begin range_next = 17'd55; qbpp_next = 5'd6; end
6'd10: begin range_next = 17'd50; qbpp_next = 5'd6; end
6'd11: begin range_next = 17'd46; qbpp_next = 5'd6; end
6'd12: begin range_next = 17'd42; qbpp_next = 5'd6; end
6'd13: begin range_next = 17'd39; qbpp_next = 5'd6; end
6'd14: begin range_next = 17'd37; qbpp_next = 5'd6; end
6'd15: begin range_next = 17'd34; qbpp_next = 5'd6; end
6'd16: begin range_next = 17'd32; qbpp_next = 5'd5; end
6'd17: begin range_next = 17'd31; qbpp_next = 5'd5; end
6'd18: begin range_next = 17'd29; qbpp_next = 5'd5; end
6'd19: begin range_next = 17'd28; qbpp_next = 5'd5; end
6'd20: begin range_next = 17'd26; qbpp_next = 5'd5; end
6'd21: begin range_next = 17'd25; qbpp_next = 5'd5; end
6'd22: begin range_next = 17'd24; qbpp_next = 5'd5; end
6'd23: begin range_next = 17'd23; qbpp_next = 5'd5; end
6'd24: begin range_next = 17'd22; qbpp_next = 5'd5; end
6'd25: begin range_next = 17'd22; qbpp_next = 5'd5; end
6'd26: begin range_next = 17'd21; qbpp_next = 5'd5; end
6'd27: begin range_next = 17'd20; qbpp_next = 5'd5; end
6'd28: begin range_next = 17'd19; qbpp_next = 5'd5; end
6'd29: begin range_next = 17'd19; qbpp_next = 5'd5; end
6'd30: begin range_next = 17'd18; qbpp_next = 5'd5; end
default: begin range_next = 17'd18; qbpp_next = 5'd5; end
endcase
end
12: begin
limit_next = 7'd48;
case (near_clamped)
6'd0: begin range_next = 17'd4096; qbpp_next = 5'd12; end
6'd1: begin range_next = 17'd1366; qbpp_next = 5'd11; end
6'd2: begin range_next = 17'd820; qbpp_next = 5'd10; end
6'd3: begin range_next = 17'd586; qbpp_next = 5'd10; end
6'd4: begin range_next = 17'd456; qbpp_next = 5'd9; end
6'd5: begin range_next = 17'd374; qbpp_next = 5'd9; end
6'd6: begin range_next = 17'd316; qbpp_next = 5'd9; end
6'd7: begin range_next = 17'd274; qbpp_next = 5'd9; end
6'd8: begin range_next = 17'd242; qbpp_next = 5'd8; end
6'd9: begin range_next = 17'd217; qbpp_next = 5'd8; end
6'd10: begin range_next = 17'd196; qbpp_next = 5'd8; end
6'd11: begin range_next = 17'd180; qbpp_next = 5'd8; end
6'd12: begin range_next = 17'd165; qbpp_next = 5'd8; end
6'd13: begin range_next = 17'd153; qbpp_next = 5'd8; end
6'd14: begin range_next = 17'd143; qbpp_next = 5'd8; end
6'd15: begin range_next = 17'd134; qbpp_next = 5'd8; end
6'd16: begin range_next = 17'd126; qbpp_next = 5'd7; end
6'd17: begin range_next = 17'd118; qbpp_next = 5'd7; end
6'd18: begin range_next = 17'd112; qbpp_next = 5'd7; end
6'd19: begin range_next = 17'd106; qbpp_next = 5'd7; end
6'd20: begin range_next = 17'd101; qbpp_next = 5'd7; end
6'd21: begin range_next = 17'd97; qbpp_next = 5'd7; end
6'd22: begin range_next = 17'd92; qbpp_next = 5'd7; end
6'd23: begin range_next = 17'd89; qbpp_next = 5'd7; end
6'd24: begin range_next = 17'd85; qbpp_next = 5'd7; end
6'd25: begin range_next = 17'd82; qbpp_next = 5'd7; end
6'd26: begin range_next = 17'd79; qbpp_next = 5'd7; end
6'd27: begin range_next = 17'd76; qbpp_next = 5'd7; end
6'd28: begin range_next = 17'd73; qbpp_next = 5'd7; end
6'd29: begin range_next = 17'd71; qbpp_next = 5'd7; end
6'd30: begin range_next = 17'd69; qbpp_next = 5'd7; end
default: begin range_next = 17'd66; qbpp_next = 5'd7; end
endcase
end
14: begin
limit_next = 7'd56;
case (near_clamped)
6'd0: begin range_next = 17'd16384; qbpp_next = 5'd14; end
6'd1: begin range_next = 17'd5462; qbpp_next = 5'd13; end
6'd2: begin range_next = 17'd3278; qbpp_next = 5'd12; end
6'd3: begin range_next = 17'd2342; qbpp_next = 5'd12; end
6'd4: begin range_next = 17'd1822; qbpp_next = 5'd11; end
6'd5: begin range_next = 17'd1491; qbpp_next = 5'd11; end
6'd6: begin range_next = 17'd1262; qbpp_next = 5'd11; end
6'd7: begin range_next = 17'd1094; qbpp_next = 5'd11; end
6'd8: begin range_next = 17'd965; qbpp_next = 5'd10; end
6'd9: begin range_next = 17'd864; qbpp_next = 5'd10; end
6'd10: begin range_next = 17'd782; qbpp_next = 5'd10; end
6'd11: begin range_next = 17'd714; qbpp_next = 5'd10; end
6'd12: begin range_next = 17'd657; qbpp_next = 5'd10; end
6'd13: begin range_next = 17'd608; qbpp_next = 5'd10; end
6'd14: begin range_next = 17'd566; qbpp_next = 5'd10; end
6'd15: begin range_next = 17'd530; qbpp_next = 5'd10; end
6'd16: begin range_next = 17'd498; qbpp_next = 5'd9; end
6'd17: begin range_next = 17'd470; qbpp_next = 5'd9; end
6'd18: begin range_next = 17'd444; qbpp_next = 5'd9; end
6'd19: begin range_next = 17'd422; qbpp_next = 5'd9; end
6'd20: begin range_next = 17'd401; qbpp_next = 5'd9; end
6'd21: begin range_next = 17'd382; qbpp_next = 5'd9; end
6'd22: begin range_next = 17'd366; qbpp_next = 5'd9; end
6'd23: begin range_next = 17'd350; qbpp_next = 5'd9; end
6'd24: begin range_next = 17'd336; qbpp_next = 5'd9; end
6'd25: begin range_next = 17'd323; qbpp_next = 5'd9; end
6'd26: begin range_next = 17'd311; qbpp_next = 5'd9; end
6'd27: begin range_next = 17'd299; qbpp_next = 5'd9; end
6'd28: begin range_next = 17'd289; qbpp_next = 5'd9; end
6'd29: begin range_next = 17'd279; qbpp_next = 5'd9; end
6'd30: begin range_next = 17'd270; qbpp_next = 5'd9; end
default: begin range_next = 17'd262; qbpp_next = 5'd9; end
endcase
end
default: begin
limit_next = 7'd64;
case (near_clamped)
6'd0: begin range_next = 17'd65536; qbpp_next = 5'd16; end
6'd1: begin range_next = 17'd21846; qbpp_next = 5'd15; end
6'd2: begin range_next = 17'd13108; qbpp_next = 5'd14; end
6'd3: begin range_next = 17'd9364; qbpp_next = 5'd14; end
6'd4: begin range_next = 17'd7283; qbpp_next = 5'd13; end
6'd5: begin range_next = 17'd5959; qbpp_next = 5'd13; end
6'd6: begin range_next = 17'd5043; qbpp_next = 5'd13; end
6'd7: begin range_next = 17'd4370; qbpp_next = 5'd13; end
6'd8: begin range_next = 17'd3856; qbpp_next = 5'd12; end
6'd9: begin range_next = 17'd3451; qbpp_next = 5'd12; end
6'd10: begin range_next = 17'd3122; qbpp_next = 5'd12; end
6'd11: begin range_next = 17'd2851; qbpp_next = 5'd12; end
6'd12: begin range_next = 17'd2623; qbpp_next = 5'd12; end
6'd13: begin range_next = 17'd2429; qbpp_next = 5'd12; end
6'd14: begin range_next = 17'd2261; qbpp_next = 5'd12; end
6'd15: begin range_next = 17'd2116; qbpp_next = 5'd12; end
6'd16: begin range_next = 17'd1987; qbpp_next = 5'd11; end
6'd17: begin range_next = 17'd1874; qbpp_next = 5'd11; end
6'd18: begin range_next = 17'd1773; qbpp_next = 5'd11; end
6'd19: begin range_next = 17'd1682; qbpp_next = 5'd11; end
6'd20: begin range_next = 17'd1600; qbpp_next = 5'd11; end
6'd21: begin range_next = 17'd1526; qbpp_next = 5'd11; end
6'd22: begin range_next = 17'd1458; qbpp_next = 5'd11; end
6'd23: begin range_next = 17'd1396; qbpp_next = 5'd11; end
6'd24: begin range_next = 17'd1339; qbpp_next = 5'd11; end
6'd25: begin range_next = 17'd1286; qbpp_next = 5'd11; end
6'd26: begin range_next = 17'd1238; qbpp_next = 5'd11; end
6'd27: begin range_next = 17'd1193; qbpp_next = 5'd11; end
6'd28: begin range_next = 17'd1151; qbpp_next = 5'd11; end
6'd29: begin range_next = 17'd1112; qbpp_next = 5'd11; end
6'd30: begin range_next = 17'd1076; qbpp_next = 5'd11; end
default: begin range_next = 17'd1042; qbpp_next = 5'd11; end
endcase
end
endcase
end
always_comb begin
RANGE = range_next;
qbpp = qbpp_next;
LIMIT = limit_next;
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,51 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.1, Annex C.1-C.4, Annex D.3
// Figure : N/A
// Table : N/A
// Pseudocode : Shared engineering constants for JPEG-LS strip-frame encoder
// Example : See docs/jls_module_interfaces.md
//
// Shared package for the JPEG-LS RTL encoder. Keep this file limited to
// simple constants and small type declarations; algorithmic logic belongs in
// pipelined modules, not in package functions.
package jls_common_pkg;
// Number of bits used by cfg_pic_col/cfg_pic_row and image coordinates.
localparam int JLS_DIM_WIDTH = 13;
// Number of bits used by the runtime ratio input port.
localparam int JLS_RATIO_WIDTH = 4;
// Output FIFO byte plus original-image-start sideband bit.
localparam int JLS_OFIFO_WIDTH = 9;
// JPEG-LS NEAR is clamped to 0..31 in this IP.
localparam int JLS_NEAR_WIDTH = 6;
// JPEG marker byte used by SOI/SOF55/LSE/SOS/EOI marker generation.
localparam logic [7:0] JLS_MARKER_PREFIX = 8'hFF;
// JPEG-LS marker codes used by the header writer.
localparam logic [7:0] JLS_MARKER_SOI = 8'hD8;
localparam logic [7:0] JLS_MARKER_EOI = 8'hD9;
localparam logic [7:0] JLS_MARKER_SOF55 = 8'hF7;
localparam logic [7:0] JLS_MARKER_LSE = 8'hF8;
localparam logic [7:0] JLS_MARKER_SOS = 8'hDA;
// Runtime compression-ratio encodings from the SRS ratio port.
typedef enum logic [JLS_RATIO_WIDTH-1:0] {
JLS_RATIO_LOSSLESS = 4'd0,
JLS_RATIO_1_TO_2 = 4'd1,
JLS_RATIO_1_TO_4 = 4'd2,
JLS_RATIO_1_TO_8 = 4'd3
} jls_ratio_e;
// High-level strip-frame control events.
typedef enum logic [1:0] {
JLS_STRIP_EVENT_NONE = 2'd0,
JLS_STRIP_EVENT_START = 2'd1,
JLS_STRIP_EVENT_FINISH = 2'd2
} jls_strip_event_e;
endpackage

View File

@@ -0,0 +1,215 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.2 initialization, Annex A.6 variables update
// Figure : N/A
// Table : N/A
// Pseudocode : Regular-mode context arrays A[0..364], B[0..364], C[0..364], N[0..364]
// Trace : docs/jls_traceability.md#context-update
// Example : RANGE=256 initializes A to max(2,(RANGE+32)/64)=4.
//
// Regular context storage. This module uses lazy strip initialization: strip
// start clears a written-bit vector and latches the Annex A.2 default A value.
// A later read of an unwritten context returns the default A/B/C/N tuple, while
// a written context returns the RAM value. This is equivalent to writing all
// 365 contexts at strip start, but avoids a long boundary stall.
`default_nettype none
module jls_context_memory #(
// Number of regular-mode contexts, indexed by abs((Q1*9+Q2)*9+Q3).
parameter int CONTEXT_COUNT = 365
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Start lazy initialization for a new standalone strip frame.
input var logic init_valid,
// Initialization command can be accepted.
output logic init_ready,
// JPEG-LS RANGE parameter used to initialize A[Q].
input var logic [16:0] init_RANGE,
// High while a multi-cycle initializer is active. The lazy path keeps this
// low because the reset happens in the command-accept cycle.
output logic init_busy,
// One-cycle pulse after lazy initialization has been applied.
output logic init_done,
// Registered read request.
input var logic read_valid,
// Read request can be accepted.
output logic read_ready,
// Context index to read.
input var logic [8:0] read_context_index,
// Read result is valid.
output logic read_result_valid,
// Downstream stage accepted the read result.
input var logic read_result_ready,
// Read context index and variables.
output logic [8:0] read_result_context_index,
output logic [31:0] read_A,
output logic signed [31:0] read_B,
output logic signed [8:0] read_C,
output logic [15:0] read_N,
// Writeback request after context update arithmetic.
input var logic write_valid,
// Writeback can be accepted.
output logic write_ready,
// Context index and updated variables to write.
input var logic [8:0] write_context_index,
input var logic [31:0] write_A,
input var logic signed [31:0] write_B,
input var logic signed [8:0] write_C,
input var logic [15:0] write_N
);
// Context variable memories.
logic [31:0] A_mem [0:CONTEXT_COUNT-1];
logic signed [31:0] B_mem [0:CONTEXT_COUNT-1];
logic signed [8:0] C_mem [0:CONTEXT_COUNT-1];
logic [15:0] N_mem [0:CONTEXT_COUNT-1];
// Lazy initialization state. context_written[Q]=0 means context Q still has
// the current strip's default A/B/C/N values.
logic [CONTEXT_COUNT-1:0] context_written;
logic [31:0] init_A_value;
logic [31:0] init_A_latched;
logic [31:0] init_A_candidate;
logic init_command_accepted;
// Read/write handshake terms.
logic read_slot_open;
logic read_accepted;
logic write_accepted;
always_comb begin
init_A_candidate = {15'd0, init_RANGE} + 32'd32;
init_A_candidate = init_A_candidate >> 6;
end
always_comb begin
init_A_value = init_A_candidate;
if (init_A_candidate < 32'd2) begin
init_A_value = 32'd2;
end
end
always_comb begin
init_ready = 1'b0;
if (!read_result_valid && !write_valid) begin
init_ready = 1'b1;
end
end
always_comb begin
init_command_accepted = 1'b0;
if (init_valid && init_ready) begin
init_command_accepted = 1'b1;
end
end
always_comb begin
read_slot_open = 1'b0;
if (!read_result_valid || read_result_ready) begin
read_slot_open = 1'b1;
end
end
always_comb begin
read_ready = 1'b0;
if (!init_command_accepted && read_slot_open) begin
read_ready = 1'b1;
end
end
always_comb begin
read_accepted = 1'b0;
if (read_valid && read_ready) begin
read_accepted = 1'b1;
end
end
always_comb begin
write_ready = 1'b0;
if (!init_command_accepted) begin
write_ready = 1'b1;
end
end
always_comb begin
write_accepted = 1'b0;
if (write_valid && write_ready) begin
write_accepted = 1'b1;
end
end
always_comb begin
init_busy = 1'b0;
end
always_ff @(posedge clk) begin
if (rst) begin
init_A_latched <= 32'd0;
context_written <= {CONTEXT_COUNT{1'b0}};
init_done <= 1'b0;
read_result_valid <= 1'b0;
read_result_context_index <= 9'd0;
read_A <= 32'd0;
read_B <= 32'sd0;
read_C <= 9'sd0;
read_N <= 16'd0;
end else begin
init_done <= 1'b0;
if (read_result_valid && read_result_ready && !read_accepted) begin
read_result_valid <= 1'b0;
end
if (init_command_accepted) begin
init_A_latched <= init_A_value;
context_written <= {CONTEXT_COUNT{1'b0}};
init_done <= 1'b1;
end
if (write_accepted) begin
A_mem[write_context_index] <= write_A;
B_mem[write_context_index] <= write_B;
C_mem[write_context_index] <= write_C;
N_mem[write_context_index] <= write_N;
context_written[write_context_index] <= 1'b1;
end
if (read_accepted) begin
read_result_valid <= 1'b1;
read_result_context_index <= read_context_index;
if (context_written[read_context_index]) begin
read_A <= A_mem[read_context_index];
read_B <= B_mem[read_context_index];
read_C <= C_mem[read_context_index];
read_N <= N_mem[read_context_index];
end else begin
read_A <= init_A_latched;
read_B <= 32'sd0;
read_C <= 9'sd0;
read_N <= 16'd1;
end
end
end
end
endmodule
`default_nettype wire

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,698 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.3 context determination, Annex G.1 variables
// Figure : N/A
// Table : N/A
// Pseudocode : Quantize D1/D2/D3 into Q1/Q2/Q3 and compute context ID
// Trace : docs/jls_traceability.md#context-update
// Example : D1=22,D2=8,D3=2 with T1=3,T2=7,T3=21,NEAR=0 gives Q=4,3,1.
//
// Context quantization stage. It does not update A/B/C/N; it only computes the
// regular-mode context selector from reconstructed neighbors and forwards the
// predictor event to the later context memory/update pipeline.
`default_nettype none
module jls_gradient_quantize_one #(
// Signed gradient width. For PIX_WIDTH=16 this is 17 bits.
parameter int DI_WIDTH = 17
) (
// Local gradient Di from the JPEG-LS standard.
input var logic signed [DI_WIDTH-1:0] Di,
// JPEG-LS threshold T1 for the current NEAR.
input var logic [15:0] T1,
// JPEG-LS threshold T2 for the current NEAR.
input var logic [15:0] T2,
// JPEG-LS threshold T3 for the current NEAR.
input var logic [15:0] T3,
// JPEG-LS NEAR parameter for the current strip frame.
input var logic [5:0] NEAR,
// Quantized gradient Qi in the range -4..4.
output logic signed [3:0] Qi
);
// Padding for NEAR into the signed gradient compare width.
localparam int NEAR_PAD_WIDTH = DI_WIDTH - 6;
// Signed compare constants. T1/T2/T3 are already valid for the configured
// PIX_WIDTH, so truncation to DI_WIDTH is safe for the supported precisions.
logic signed [DI_WIDTH-1:0] t1_pos;
logic signed [DI_WIDTH-1:0] t2_pos;
logic signed [DI_WIDTH-1:0] t3_pos;
logic signed [DI_WIDTH-1:0] near_pos;
logic signed [DI_WIDTH-1:0] t1_neg;
logic signed [DI_WIDTH-1:0] t2_neg;
logic signed [DI_WIDTH-1:0] t3_neg;
logic signed [DI_WIDTH-1:0] near_neg;
always_comb begin
t1_pos = $signed({1'b0, T1[DI_WIDTH-2:0]});
t2_pos = $signed({1'b0, T2[DI_WIDTH-2:0]});
t3_pos = $signed({1'b0, T3[DI_WIDTH-2:0]});
near_pos = $signed({{NEAR_PAD_WIDTH{1'b0}}, NEAR});
end
always_comb begin
t1_neg = -t1_pos;
t2_neg = -t2_pos;
t3_neg = -t3_pos;
near_neg = -near_pos;
end
always_comb begin
Qi = 4'sd0;
case (1'b1)
(Di <= t3_neg): begin
Qi = -4'sd4;
end
(Di <= t2_neg): begin
Qi = -4'sd3;
end
(Di <= t1_neg): begin
Qi = -4'sd2;
end
(Di < near_neg): begin
Qi = -4'sd1;
end
(Di <= near_pos): begin
Qi = 4'sd0;
end
(Di < t1_pos): begin
Qi = 4'sd1;
end
(Di < t2_pos): begin
Qi = 4'sd2;
end
(Di < t3_pos): begin
Qi = 4'sd3;
end
default: begin
Qi = 4'sd4;
end
endcase
end
endmodule
module jls_context_quantizer #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Predicted event from jls_predictor is valid.
input var logic predict_valid,
// This context stage can accept the current predicted event.
output logic predict_ready,
// Forwarded original input sample X.
input var logic [PIX_WIDTH-1:0] predict_sample,
// Forwarded pixel coordinate.
input var logic [12:0] predict_x,
// Forwarded pixel coordinate.
input var logic [12:0] predict_y,
// Forwarded first-pixel flag for strip-local state reset.
input var logic predict_strip_first_pixel,
// Forwarded last-pixel flag for strip-local flush handling.
input var logic predict_strip_last_pixel,
// JPEG-LS reconstructed neighbors.
input var logic [PIX_WIDTH-1:0] Ra,
input var logic [PIX_WIDTH-1:0] Rb,
input var logic [PIX_WIDTH-1:0] Rc,
input var logic [PIX_WIDTH-1:0] Rd,
// JPEG-LS MED prediction value Px.
input var logic [PIX_WIDTH-1:0] Px,
// JPEG-LS threshold and NEAR values for the current strip frame.
input var logic [15:0] T1,
input var logic [15:0] T2,
input var logic [15:0] T3,
input var logic [5:0] NEAR,
// Quantized context event is valid.
output logic context_valid,
// Downstream context memory/error stage accepted this event.
input var logic context_ready,
// Forwarded original input sample X.
output logic [PIX_WIDTH-1:0] context_sample,
// Forwarded pixel coordinate.
output logic [12:0] context_x,
// Forwarded pixel coordinate.
output logic [12:0] context_y,
// Forwarded strip boundary flags.
output logic context_strip_first_pixel,
output logic context_strip_last_pixel,
// Forwarded predictor and neighbors.
output logic [PIX_WIDTH-1:0] context_Px,
output logic [PIX_WIDTH-1:0] context_Ra,
output logic [PIX_WIDTH-1:0] context_Rb,
output logic [PIX_WIDTH-1:0] context_Rc,
output logic [PIX_WIDTH-1:0] context_Rd,
// Standard quantized gradients Q1/Q2/Q3.
output logic signed [3:0] Q1,
output logic signed [3:0] Q2,
output logic signed [3:0] Q3,
// Absolute regular-mode context index, 0..364.
output logic [8:0] context_index,
// Context sign is high when the unnormalized context value is negative.
output logic context_negative,
// High when Q1=Q2=Q3=0, which selects JPEG-LS run mode.
output logic run_mode_context
);
// Signed gradient width, enough for -(2^PIX_WIDTH-1)..(2^PIX_WIDTH-1).
localparam int DI_WIDTH = PIX_WIDTH + 1;
// Standard local gradients.
logic signed [DI_WIDTH-1:0] D1;
logic signed [DI_WIDTH-1:0] D2;
logic signed [DI_WIDTH-1:0] D3;
// Registered input stage. Annex A.3 gradient quantization uses strip
// constants T1/T2/T3/NEAR and four reconstructed neighbors; latching them
// with the pixel event keeps active_strip_near from feeding the full compare
// tree in the same 250 MHz cycle.
logic stage_valid;
logic [PIX_WIDTH-1:0] stage_sample;
logic [12:0] stage_x;
logic [12:0] stage_y;
logic stage_strip_first_pixel;
logic stage_strip_last_pixel;
logic [PIX_WIDTH-1:0] stage_Px;
logic [PIX_WIDTH-1:0] stage_Ra;
logic [PIX_WIDTH-1:0] stage_Rb;
logic [PIX_WIDTH-1:0] stage_Rc;
logic [PIX_WIDTH-1:0] stage_Rd;
logic [15:0] stage_T1;
logic [15:0] stage_T2;
logic [15:0] stage_T3;
logic [5:0] stage_NEAR;
// One-entry input skid slot. predict_ready depends only on this local slot,
// not on context_ready from the later context-memory hazard path. If the
// quantizer pipeline stalls, one additional predictor event can be absorbed
// locally; after the stall releases, the skid entry is promoted to stage_*.
logic stage_next_valid;
logic [PIX_WIDTH-1:0] stage_next_sample;
logic [12:0] stage_next_x;
logic [12:0] stage_next_y;
logic stage_next_strip_first_pixel;
logic stage_next_strip_last_pixel;
logic [PIX_WIDTH-1:0] stage_next_Px;
logic [PIX_WIDTH-1:0] stage_next_Ra;
logic [PIX_WIDTH-1:0] stage_next_Rb;
logic [PIX_WIDTH-1:0] stage_next_Rc;
logic [PIX_WIDTH-1:0] stage_next_Rd;
logic [15:0] stage_next_T1;
logic [15:0] stage_next_T2;
logic [15:0] stage_next_T3;
logic [5:0] stage_next_NEAR;
// Registered quantized-gradient payload. Splitting Q1/Q2/Q3 from the
// threshold compare stage keeps Annex A.3 gradient quantization out of the
// context-index adder chain at the 250 MHz target.
logic q_stage_valid;
logic [PIX_WIDTH-1:0] q_stage_sample;
logic [12:0] q_stage_x;
logic [12:0] q_stage_y;
logic q_stage_strip_first_pixel;
logic q_stage_strip_last_pixel;
logic [PIX_WIDTH-1:0] q_stage_Px;
logic [PIX_WIDTH-1:0] q_stage_Ra;
logic [PIX_WIDTH-1:0] q_stage_Rb;
logic [PIX_WIDTH-1:0] q_stage_Rc;
logic [PIX_WIDTH-1:0] q_stage_Rd;
logic signed [3:0] q_stage_Q1;
logic signed [3:0] q_stage_Q2;
logic signed [3:0] q_stage_Q3;
// One-entry output skid slot. q_stage progression depends only on this
// local slot, not on the downstream context-memory hazard ready path. This
// keeps run/regular arbitration and context-memory backpressure out of the
// quantizer stage register CE cone.
logic context_next_valid;
logic [PIX_WIDTH-1:0] context_next_sample;
logic [12:0] context_next_x;
logic [12:0] context_next_y;
logic context_next_strip_first_pixel;
logic context_next_strip_last_pixel;
logic [PIX_WIDTH-1:0] context_next_Px;
logic [PIX_WIDTH-1:0] context_next_Ra;
logic [PIX_WIDTH-1:0] context_next_Rb;
logic [PIX_WIDTH-1:0] context_next_Rc;
logic [PIX_WIDTH-1:0] context_next_Rd;
logic signed [3:0] context_next_Q1;
logic signed [3:0] context_next_Q2;
logic signed [3:0] context_next_Q3;
logic [8:0] context_next_index;
logic context_next_negative;
logic context_next_run_mode;
// Combinational quantized gradients.
logic signed [3:0] q1_next;
logic signed [3:0] q2_next;
logic signed [3:0] q3_next;
// Signed context computation: (Q1 * 9 + Q2) * 9 + Q3.
logic signed [9:0] q1_ext;
logic signed [9:0] q2_ext;
logic signed [9:0] q3_ext;
logic signed [9:0] q1_times_81;
logic signed [9:0] q2_times_9;
logic signed [9:0] context_value_next;
logic signed [9:0] context_abs_next;
logic context_negative_next;
logic run_mode_context_next;
// Handshake terms.
logic context_accept;
logic context_direct_from_q;
logic context_store_next;
logic context_promote_next;
logic q_stage_to_output;
logic q_stage_open;
logic stage_to_q;
logic accept_predict;
logic stage_load_predict;
logic stage_store_next;
logic stage_promote_next;
always_comb begin
D1 = $signed({1'b0, stage_Rd}) - $signed({1'b0, stage_Rb});
D2 = $signed({1'b0, stage_Rb}) - $signed({1'b0, stage_Rc});
D3 = $signed({1'b0, stage_Rc}) - $signed({1'b0, stage_Ra});
end
jls_gradient_quantize_one #(
.DI_WIDTH(DI_WIDTH)
) q1_quantizer (
.Di(D1),
.T1(stage_T1),
.T2(stage_T2),
.T3(stage_T3),
.NEAR(stage_NEAR),
.Qi(q1_next)
);
jls_gradient_quantize_one #(
.DI_WIDTH(DI_WIDTH)
) q2_quantizer (
.Di(D2),
.T1(stage_T1),
.T2(stage_T2),
.T3(stage_T3),
.NEAR(stage_NEAR),
.Qi(q2_next)
);
jls_gradient_quantize_one #(
.DI_WIDTH(DI_WIDTH)
) q3_quantizer (
.Di(D3),
.T1(stage_T1),
.T2(stage_T2),
.T3(stage_T3),
.NEAR(stage_NEAR),
.Qi(q3_next)
);
always_comb begin
q1_ext = {{6{q_stage_Q1[3]}}, q_stage_Q1};
q2_ext = {{6{q_stage_Q2[3]}}, q_stage_Q2};
q3_ext = {{6{q_stage_Q3[3]}}, q_stage_Q3};
end
always_comb begin
q1_times_81 = (q1_ext <<< 6) + (q1_ext <<< 4) + q1_ext;
q2_times_9 = (q2_ext <<< 3) + q2_ext;
end
always_comb begin
context_value_next = q1_times_81 + q2_times_9 + q3_ext;
end
always_comb begin
context_negative_next = 1'b0;
if (context_value_next < 10'sd0) begin
context_negative_next = 1'b1;
end
end
always_comb begin
context_abs_next = context_value_next;
if (context_negative_next) begin
context_abs_next = -context_value_next;
end
end
always_comb begin
run_mode_context_next = 1'b0;
if (q_stage_Q1 == 4'sd0 && q_stage_Q2 == 4'sd0 && q_stage_Q3 == 4'sd0) begin
run_mode_context_next = 1'b1;
end
end
always_comb begin
context_accept = 1'b0;
if (context_valid && context_ready) begin
context_accept = 1'b1;
end
end
always_comb begin
context_direct_from_q = 1'b0;
if (q_stage_to_output && (!context_valid || context_accept)) begin
context_direct_from_q = 1'b1;
end
end
always_comb begin
context_store_next = 1'b0;
if (q_stage_to_output && context_valid && !context_accept) begin
context_store_next = 1'b1;
end
end
always_comb begin
context_promote_next = 1'b0;
if (context_next_valid && (!context_valid || context_accept)) begin
context_promote_next = 1'b1;
end
end
always_comb begin
q_stage_to_output = 1'b0;
if (q_stage_valid && !context_next_valid) begin
q_stage_to_output = 1'b1;
end
end
always_comb begin
q_stage_open = 1'b0;
if (!q_stage_valid || q_stage_to_output) begin
q_stage_open = 1'b1;
end
end
always_comb begin
stage_to_q = 1'b0;
if (stage_valid && q_stage_open) begin
stage_to_q = 1'b1;
end
end
always_comb begin
predict_ready = 1'b0;
if (!stage_next_valid) begin
predict_ready = 1'b1;
end
end
always_comb begin
accept_predict = 1'b0;
if (predict_valid && predict_ready) begin
accept_predict = 1'b1;
end
end
always_comb begin
stage_load_predict = 1'b0;
if (accept_predict && (!stage_valid || stage_to_q)) begin
stage_load_predict = 1'b1;
end
end
always_comb begin
stage_store_next = 1'b0;
if (accept_predict && stage_valid && !stage_to_q) begin
stage_store_next = 1'b1;
end
end
always_comb begin
stage_promote_next = 1'b0;
if (stage_to_q && stage_next_valid) begin
stage_promote_next = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
stage_valid <= 1'b0;
stage_sample <= {PIX_WIDTH{1'b0}};
stage_x <= 13'd0;
stage_y <= 13'd0;
stage_strip_first_pixel <= 1'b0;
stage_strip_last_pixel <= 1'b0;
stage_Px <= {PIX_WIDTH{1'b0}};
stage_Ra <= {PIX_WIDTH{1'b0}};
stage_Rb <= {PIX_WIDTH{1'b0}};
stage_Rc <= {PIX_WIDTH{1'b0}};
stage_Rd <= {PIX_WIDTH{1'b0}};
stage_T1 <= 16'd0;
stage_T2 <= 16'd0;
stage_T3 <= 16'd0;
stage_NEAR <= 6'd0;
stage_next_valid <= 1'b0;
stage_next_sample <= {PIX_WIDTH{1'b0}};
stage_next_x <= 13'd0;
stage_next_y <= 13'd0;
stage_next_strip_first_pixel <= 1'b0;
stage_next_strip_last_pixel <= 1'b0;
stage_next_Px <= {PIX_WIDTH{1'b0}};
stage_next_Ra <= {PIX_WIDTH{1'b0}};
stage_next_Rb <= {PIX_WIDTH{1'b0}};
stage_next_Rc <= {PIX_WIDTH{1'b0}};
stage_next_Rd <= {PIX_WIDTH{1'b0}};
stage_next_T1 <= 16'd0;
stage_next_T2 <= 16'd0;
stage_next_T3 <= 16'd0;
stage_next_NEAR <= 6'd0;
q_stage_valid <= 1'b0;
q_stage_sample <= {PIX_WIDTH{1'b0}};
q_stage_x <= 13'd0;
q_stage_y <= 13'd0;
q_stage_strip_first_pixel <= 1'b0;
q_stage_strip_last_pixel <= 1'b0;
q_stage_Px <= {PIX_WIDTH{1'b0}};
q_stage_Ra <= {PIX_WIDTH{1'b0}};
q_stage_Rb <= {PIX_WIDTH{1'b0}};
q_stage_Rc <= {PIX_WIDTH{1'b0}};
q_stage_Rd <= {PIX_WIDTH{1'b0}};
q_stage_Q1 <= 4'sd0;
q_stage_Q2 <= 4'sd0;
q_stage_Q3 <= 4'sd0;
context_next_valid <= 1'b0;
context_next_sample <= {PIX_WIDTH{1'b0}};
context_next_x <= 13'd0;
context_next_y <= 13'd0;
context_next_strip_first_pixel <= 1'b0;
context_next_strip_last_pixel <= 1'b0;
context_next_Px <= {PIX_WIDTH{1'b0}};
context_next_Ra <= {PIX_WIDTH{1'b0}};
context_next_Rb <= {PIX_WIDTH{1'b0}};
context_next_Rc <= {PIX_WIDTH{1'b0}};
context_next_Rd <= {PIX_WIDTH{1'b0}};
context_next_Q1 <= 4'sd0;
context_next_Q2 <= 4'sd0;
context_next_Q3 <= 4'sd0;
context_next_index <= 9'd0;
context_next_negative <= 1'b0;
context_next_run_mode <= 1'b0;
context_valid <= 1'b0;
context_sample <= {PIX_WIDTH{1'b0}};
context_x <= 13'd0;
context_y <= 13'd0;
context_strip_first_pixel <= 1'b0;
context_strip_last_pixel <= 1'b0;
context_Px <= {PIX_WIDTH{1'b0}};
context_Ra <= {PIX_WIDTH{1'b0}};
context_Rb <= {PIX_WIDTH{1'b0}};
context_Rc <= {PIX_WIDTH{1'b0}};
context_Rd <= {PIX_WIDTH{1'b0}};
Q1 <= 4'sd0;
Q2 <= 4'sd0;
Q3 <= 4'sd0;
context_index <= 9'd0;
context_negative <= 1'b0;
run_mode_context <= 1'b0;
end else begin
if (context_accept && !context_promote_next && !context_direct_from_q) begin
context_valid <= 1'b0;
end
if (context_promote_next) begin
context_valid <= 1'b1;
context_sample <= context_next_sample;
context_x <= context_next_x;
context_y <= context_next_y;
context_strip_first_pixel <= context_next_strip_first_pixel;
context_strip_last_pixel <= context_next_strip_last_pixel;
context_Px <= context_next_Px;
context_Ra <= context_next_Ra;
context_Rb <= context_next_Rb;
context_Rc <= context_next_Rc;
context_Rd <= context_next_Rd;
Q1 <= context_next_Q1;
Q2 <= context_next_Q2;
Q3 <= context_next_Q3;
context_index <= context_next_index;
context_negative <= context_next_negative;
run_mode_context <= context_next_run_mode;
context_next_valid <= 1'b0;
end
if (context_direct_from_q) begin
context_valid <= 1'b1;
context_sample <= q_stage_sample;
context_x <= q_stage_x;
context_y <= q_stage_y;
context_strip_first_pixel <= q_stage_strip_first_pixel;
context_strip_last_pixel <= q_stage_strip_last_pixel;
context_Px <= q_stage_Px;
context_Ra <= q_stage_Ra;
context_Rb <= q_stage_Rb;
context_Rc <= q_stage_Rc;
context_Rd <= q_stage_Rd;
Q1 <= q_stage_Q1;
Q2 <= q_stage_Q2;
Q3 <= q_stage_Q3;
context_index <= context_abs_next[8:0];
context_negative <= context_negative_next;
run_mode_context <= run_mode_context_next;
end
if (context_store_next) begin
context_next_valid <= 1'b1;
context_next_sample <= q_stage_sample;
context_next_x <= q_stage_x;
context_next_y <= q_stage_y;
context_next_strip_first_pixel <= q_stage_strip_first_pixel;
context_next_strip_last_pixel <= q_stage_strip_last_pixel;
context_next_Px <= q_stage_Px;
context_next_Ra <= q_stage_Ra;
context_next_Rb <= q_stage_Rb;
context_next_Rc <= q_stage_Rc;
context_next_Rd <= q_stage_Rd;
context_next_Q1 <= q_stage_Q1;
context_next_Q2 <= q_stage_Q2;
context_next_Q3 <= q_stage_Q3;
context_next_index <= context_abs_next[8:0];
context_next_negative <= context_negative_next;
context_next_run_mode <= run_mode_context_next;
end
if (stage_to_q) begin
q_stage_valid <= 1'b1;
q_stage_sample <= stage_sample;
q_stage_x <= stage_x;
q_stage_y <= stage_y;
q_stage_strip_first_pixel <= stage_strip_first_pixel;
q_stage_strip_last_pixel <= stage_strip_last_pixel;
q_stage_Px <= stage_Px;
q_stage_Ra <= stage_Ra;
q_stage_Rb <= stage_Rb;
q_stage_Rc <= stage_Rc;
q_stage_Rd <= stage_Rd;
q_stage_Q1 <= q1_next;
q_stage_Q2 <= q2_next;
q_stage_Q3 <= q3_next;
end else if (q_stage_to_output) begin
q_stage_valid <= 1'b0;
end
if (stage_promote_next) begin
stage_valid <= 1'b1;
stage_sample <= stage_next_sample;
stage_x <= stage_next_x;
stage_y <= stage_next_y;
stage_strip_first_pixel <= stage_next_strip_first_pixel;
stage_strip_last_pixel <= stage_next_strip_last_pixel;
stage_Px <= stage_next_Px;
stage_Ra <= stage_next_Ra;
stage_Rb <= stage_next_Rb;
stage_Rc <= stage_next_Rc;
stage_Rd <= stage_next_Rd;
stage_T1 <= stage_next_T1;
stage_T2 <= stage_next_T2;
stage_T3 <= stage_next_T3;
stage_NEAR <= stage_next_NEAR;
stage_next_valid <= 1'b0;
end else if (stage_to_q) begin
stage_valid <= 1'b0;
end
if (stage_load_predict) begin
stage_valid <= 1'b1;
stage_sample <= predict_sample;
stage_x <= predict_x;
stage_y <= predict_y;
stage_strip_first_pixel <= predict_strip_first_pixel;
stage_strip_last_pixel <= predict_strip_last_pixel;
stage_Px <= Px;
stage_Ra <= Ra;
stage_Rb <= Rb;
stage_Rc <= Rc;
stage_Rd <= Rd;
stage_T1 <= T1;
stage_T2 <= T2;
stage_T3 <= T3;
stage_NEAR <= NEAR;
end
if (stage_store_next) begin
stage_next_valid <= 1'b1;
stage_next_sample <= predict_sample;
stage_next_x <= predict_x;
stage_next_y <= predict_y;
stage_next_strip_first_pixel <= predict_strip_first_pixel;
stage_next_strip_last_pixel <= predict_strip_last_pixel;
stage_next_Px <= Px;
stage_next_Ra <= Ra;
stage_next_Rb <= Rb;
stage_next_Rc <= Rc;
stage_next_Rd <= Rd;
stage_next_T1 <= T1;
stage_next_T2 <= T2;
stage_next_T3 <= T3;
stage_next_NEAR <= NEAR;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,960 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 Golomb parameter, Annex A.6 variables update
// Figure : N/A
// Table : N/A
// Pseudocode : Compute k and update regular-mode A/B/C/N
// Trace : docs/jls_traceability.md#context-update
// Example : A=4,N=1,Errval=3 gives k=2 before A is updated to 7.
//
// Pipelined single-context update arithmetic. Stage 0 captures the Annex A.6
// input context. Stage 1 registers the odd-scale multiplier operands for
// Errval*(2*NEAR+1) and carries the Annex A.5 Golomb k decision. Stage 2
// captures the scaled product. Stage 3 accumulates B[Q] and performs
// RESET/bias correction. The split keeps NEAR-driven arithmetic short at the
// 250 MHz target.
`default_nettype none
module jls_context_update (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Input update event is valid.
input var logic update_valid,
// This stage can accept the update event.
output logic update_ready,
// Standard context variables before update.
input var logic [31:0] A_in,
input var logic signed [31:0] B_in,
input var logic signed [8:0] C_in,
input var logic [15:0] N_in,
// Quantized prediction error Errval for this context.
input var logic signed [31:0] Errval,
// Context and strip metadata forwarded with Errval.
input var logic [8:0] context_index_in,
input var logic strip_last_pixel_in,
// Coding parameters forwarded for the Golomb encoder.
input var logic [4:0] qbpp_in,
input var logic [6:0] LIMIT_in,
// JPEG-LS NEAR parameter for this strip.
input var logic [5:0] NEAR,
// JPEG-LS RESET parameter, normally 64.
input var logic [15:0] RESET,
// Output updated context event is valid.
output logic result_valid,
// Downstream context table accepted this result.
input var logic result_ready,
// Golomb parameter computed from A_in and N_in before the update.
output logic [4:0] k,
// Forwarded Errval for the downstream error mapper.
output logic signed [31:0] Errval_out,
// Forwarded context and strip metadata.
output logic [8:0] context_index_out,
output logic strip_last_pixel_out,
// Forwarded coding parameters.
output logic [4:0] qbpp_out,
output logic [6:0] LIMIT_out,
// High when get_error_correction(k | NEAR) requests mapping inversion.
output logic map_invert,
// Standard context variables after update.
output logic [31:0] A_out,
output logic signed [31:0] B_out,
output logic signed [8:0] C_out,
output logic [15:0] N_out
);
// Signed and absolute forms of Errval.
logic signed [32:0] Errval_ext;
logic [32:0] abs_Errval_ext;
// Stage-1 update terms from Annex A.6.
logic signed [7:0] near_scale;
logic signed [40:0] B_delta;
logic [31:0] A_accum_next;
logic signed [40:0] B_accum_next;
logic [15:0] N_halved_plus_one_next;
// Golomb parameter compare terms from A_in and N_in.
logic [31:0] N_shift_0;
logic [31:0] N_shift_1;
logic [31:0] N_shift_2;
logic [31:0] N_shift_3;
logic [31:0] N_shift_4;
logic [31:0] N_shift_5;
logic [31:0] N_shift_6;
logic [31:0] N_shift_7;
logic [31:0] N_shift_8;
logic [31:0] N_shift_9;
logic [31:0] N_shift_10;
logic [31:0] N_shift_11;
logic [31:0] N_shift_12;
logic [31:0] N_shift_13;
logic [31:0] N_shift_14;
logic [31:0] N_shift_15;
logic [31:0] N_shift_16;
logic [4:0] k_next;
logic k_or_near_is_zero;
logic signed [32:0] map_bias_check;
logic map_invert_next;
// Stage-0 registered input payload. These are raw standard variables from
// the context table and regular-mode Errval path.
logic s0_valid;
logic [31:0] s0_A_in;
logic signed [31:0] s0_B_in;
logic signed [8:0] s0_C_in;
logic [15:0] s0_N_in;
logic signed [31:0] s0_Errval;
logic [8:0] s0_context_index;
logic s0_strip_last_pixel;
logic [4:0] s0_qbpp;
logic [6:0] s0_LIMIT;
logic [5:0] s0_NEAR;
logic [15:0] s0_RESET;
// One-entry input skid slot. It keeps update_ready dependent only on local
// queue fullness instead of the result_ready/context-write/error-map chain.
// Example: if stage 0 is blocked for one cycle, the next regular Errval can
// be captured here without propagating downstream backpressure to the
// regular error quantizer CE path.
logic update_next_valid;
logic [31:0] update_next_A_in;
logic signed [31:0] update_next_B_in;
logic signed [8:0] update_next_C_in;
logic [15:0] update_next_N_in;
logic signed [31:0] update_next_Errval;
logic [8:0] update_next_context_index;
logic update_next_strip_last_pixel;
logic [4:0] update_next_qbpp;
logic [6:0] update_next_LIMIT;
logic [5:0] update_next_NEAR;
logic [15:0] update_next_RESET;
// Stage-1 registered multiplier operands/update payload. s1_Errval_ext and
// s1_near_scale are the registered odd-scale multiply operands for the
// Annex A.6 Errval*(2*NEAR+1) term.
logic s1_valid;
logic [31:0] s1_A_accum;
logic signed [31:0] s1_B_in;
logic signed [8:0] s1_C_in;
logic [15:0] s1_N_in;
logic [15:0] s1_N_halved_plus_one;
logic [15:0] s1_RESET;
logic [4:0] s1_k;
logic signed [31:0] s1_Errval;
logic [8:0] s1_context_index;
logic s1_strip_last_pixel;
logic [4:0] s1_qbpp;
logic [6:0] s1_LIMIT;
logic s1_map_invert;
logic signed [32:0] s1_Errval_ext;
logic signed [7:0] s1_near_scale;
// Stage-2 registered product/update payload. s2_B_delta is the registered
// scaled Errval term for the Annex A.6 B[Q] update before the following
// carry-chain add.
logic s2_valid;
logic [31:0] s2_A_accum;
logic signed [31:0] s2_B_in;
logic signed [40:0] s2_B_delta;
logic signed [8:0] s2_C_in;
logic [15:0] s2_N_in;
logic [15:0] s2_N_halved_plus_one;
logic [15:0] s2_RESET;
logic [4:0] s2_k;
logic signed [31:0] s2_Errval;
logic [8:0] s2_context_index;
logic s2_strip_last_pixel;
logic [4:0] s2_qbpp;
logic [6:0] s2_LIMIT;
logic s2_map_invert;
// Registered stage-3 payload. These names track the standard A/B/C/N and
// Errval variables so the implementation can be compared with Annex A.6.
logic stage_valid;
logic [31:0] stage_A_accum;
logic signed [40:0] stage_B_accum;
logic signed [8:0] stage_C_in;
logic [15:0] stage_N_in;
logic [15:0] stage_N_halved_plus_one;
logic [15:0] stage_RESET;
logic [4:0] stage_k;
logic signed [31:0] stage_Errval;
logic [8:0] stage_context_index;
logic stage_strip_last_pixel;
logic [4:0] stage_qbpp;
logic [6:0] stage_LIMIT;
logic stage_map_invert;
// Registered pre-bias stage. This stage holds the Annex A.6 variables after
// RESET folding but before B[Q]/C[Q] bias correction.
logic prebias_valid;
logic [31:0] prebias_A_after_reset;
logic signed [40:0] prebias_B_after_reset;
logic signed [8:0] prebias_C_in;
logic [15:0] prebias_N_after_increment;
logic [4:0] prebias_k;
logic signed [31:0] prebias_Errval;
logic [8:0] prebias_context_index;
logic prebias_strip_last_pixel;
logic [4:0] prebias_qbpp;
logic [6:0] prebias_LIMIT;
logic prebias_map_invert;
// Registered bias-correction stage. The corrected B[Q]/C[Q] values are
// stored here so the public result path does not have to re-run the bias
// compare/add/saturate logic in the same cycle.
logic bias_valid;
logic [31:0] bias_A_after_reset;
logic signed [31:0] bias_B_after_bias;
logic signed [8:0] bias_C_after_bias;
logic [15:0] bias_N_after_increment;
logic [4:0] bias_k;
logic signed [31:0] bias_Errval;
logic [8:0] bias_context_index;
logic bias_strip_last_pixel;
logic [4:0] bias_qbpp;
logic [6:0] bias_LIMIT;
logic bias_map_invert;
// One-entry output skid slot. Stage 3 may retire into this slot when the
// public result register is still waiting for context writeback or error
// mapper acceptance. This breaks the downstream ready chain from feeding
// back through every context-update pipeline CE in one 250 MHz cycle.
logic result_next_valid;
logic [4:0] result_next_k;
logic signed [31:0] result_next_Errval_out;
logic [8:0] result_next_context_index_out;
logic result_next_strip_last_pixel_out;
logic [4:0] result_next_qbpp_out;
logic [6:0] result_next_LIMIT_out;
logic result_next_map_invert;
logic [31:0] result_next_A_out;
logic signed [31:0] result_next_B_out;
logic signed [8:0] result_next_C_out;
logic [15:0] result_next_N_out;
// Stage-1 RESET and bias-update intermediates.
logic [31:0] A_after_reset;
logic signed [40:0] B_after_reset;
logic [15:0] N_after_increment;
logic signed [40:0] bias_stage_N_signed_ext;
logic signed [40:0] bias_stage_negative_N_plus_one;
logic signed [40:0] bias_stage_B_plus_N;
logic signed [40:0] bias_stage_B_minus_N;
logic signed [40:0] bias_stage_B_after_bias;
logic signed [8:0] bias_stage_C_after_bias;
// Handshake terms.
logic result_slot_open;
logic bias_to_result;
logic bias_open;
logic prebias_to_bias;
logic prebias_open;
logic stage_to_prebias;
logic stage_open;
logic s2_to_stage;
logic s2_open;
logic s1_to_s2;
logic s1_open;
logic s0_to_s1;
logic s0_open;
logic accept_update;
logic update_load_input;
logic update_store_next;
logic s0_promote_next;
logic result_accept;
logic result_direct_from_bias;
logic result_store_next;
logic result_promote_next;
// Shared narrow-scale multiplier for Annex A.6 Errval*(2*NEAR+1).
jls_near_scale_mul #(
.INPUT_WIDTH(33),
.OUTPUT_WIDTH(41)
) context_update_near_scale_mul_i (
.multiplicand_i(s1_Errval_ext),
.near_scale_i(s1_near_scale[5:0]),
.product_o(B_delta)
);
always_comb begin
Errval_ext = {s0_Errval[31], s0_Errval};
end
always_comb begin
abs_Errval_ext = Errval_ext[32:0];
if (Errval_ext < 33'sd0) begin
abs_Errval_ext = -Errval_ext;
end
end
always_comb begin
near_scale = $signed({1'b0, s0_NEAR, 1'b1});
end
always_comb begin
A_accum_next = s0_A_in + abs_Errval_ext[31:0];
B_accum_next = {{9{s2_B_in[31]}}, s2_B_in} + s2_B_delta;
N_halved_plus_one_next = (s0_N_in >> 1) + 16'd1;
end
always_comb begin
N_shift_0 = {16'd0, s0_N_in};
N_shift_1 = {15'd0, s0_N_in, 1'd0};
N_shift_2 = {14'd0, s0_N_in, 2'd0};
N_shift_3 = {13'd0, s0_N_in, 3'd0};
N_shift_4 = {12'd0, s0_N_in, 4'd0};
N_shift_5 = {11'd0, s0_N_in, 5'd0};
N_shift_6 = {10'd0, s0_N_in, 6'd0};
N_shift_7 = {9'd0, s0_N_in, 7'd0};
N_shift_8 = {8'd0, s0_N_in, 8'd0};
N_shift_9 = {7'd0, s0_N_in, 9'd0};
N_shift_10 = {6'd0, s0_N_in, 10'd0};
N_shift_11 = {5'd0, s0_N_in, 11'd0};
N_shift_12 = {4'd0, s0_N_in, 12'd0};
N_shift_13 = {3'd0, s0_N_in, 13'd0};
N_shift_14 = {2'd0, s0_N_in, 14'd0};
N_shift_15 = {1'd0, s0_N_in, 15'd0};
N_shift_16 = {s0_N_in, 16'd0};
end
always_comb begin
k_next = 5'd16;
case (1'b1)
(N_shift_0 >= s0_A_in): begin
k_next = 5'd0;
end
(N_shift_1 >= s0_A_in): begin
k_next = 5'd1;
end
(N_shift_2 >= s0_A_in): begin
k_next = 5'd2;
end
(N_shift_3 >= s0_A_in): begin
k_next = 5'd3;
end
(N_shift_4 >= s0_A_in): begin
k_next = 5'd4;
end
(N_shift_5 >= s0_A_in): begin
k_next = 5'd5;
end
(N_shift_6 >= s0_A_in): begin
k_next = 5'd6;
end
(N_shift_7 >= s0_A_in): begin
k_next = 5'd7;
end
(N_shift_8 >= s0_A_in): begin
k_next = 5'd8;
end
(N_shift_9 >= s0_A_in): begin
k_next = 5'd9;
end
(N_shift_10 >= s0_A_in): begin
k_next = 5'd10;
end
(N_shift_11 >= s0_A_in): begin
k_next = 5'd11;
end
(N_shift_12 >= s0_A_in): begin
k_next = 5'd12;
end
(N_shift_13 >= s0_A_in): begin
k_next = 5'd13;
end
(N_shift_14 >= s0_A_in): begin
k_next = 5'd14;
end
(N_shift_15 >= s0_A_in): begin
k_next = 5'd15;
end
default: begin
k_next = 5'd16;
end
endcase
end
always_comb begin
k_or_near_is_zero = 1'b0;
if (k_next == 5'd0 && s0_NEAR == 6'd0) begin
k_or_near_is_zero = 1'b1;
end
end
always_comb begin
map_bias_check = {s0_B_in[31], s0_B_in} + {s0_B_in[31], s0_B_in} + $signed({17'd0, s0_N_in}) - 33'sd1;
end
always_comb begin
map_invert_next = 1'b0;
if (k_or_near_is_zero && map_bias_check < 33'sd0) begin
map_invert_next = 1'b1;
end
end
always_comb begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.6 variables update
// Pseudocode : If N[Q] == RESET then halve A[Q], B[Q], and N[Q].
A_after_reset = stage_A_accum;
B_after_reset = stage_B_accum;
N_after_increment = stage_N_in + 16'd1;
if (stage_N_in == stage_RESET) begin
A_after_reset = stage_A_accum >> 1;
B_after_reset = stage_B_accum >>> 1;
N_after_increment = stage_N_halved_plus_one;
end
end
always_comb begin
bias_stage_N_signed_ext = $signed({25'd0, prebias_N_after_increment});
bias_stage_negative_N_plus_one = -bias_stage_N_signed_ext + 41'sd1;
bias_stage_B_plus_N = prebias_B_after_reset + bias_stage_N_signed_ext;
bias_stage_B_minus_N = prebias_B_after_reset - bias_stage_N_signed_ext;
end
always_comb begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.6 variables update
// Pseudocode : Bias correction for B[Q] and C[Q].
// Stage note : Compute the corrected B[Q]/C[Q] from the registered
// pre-bias state so RESET folding and bias correction do not sit in the
// same timing cone.
bias_stage_B_after_bias = prebias_B_after_reset;
bias_stage_C_after_bias = prebias_C_in;
case (1'b1)
(bias_stage_B_plus_N <= 41'sd0): begin
bias_stage_B_after_bias = bias_stage_B_plus_N;
if (bias_stage_B_plus_N <= -bias_stage_N_signed_ext) begin
bias_stage_B_after_bias = bias_stage_negative_N_plus_one;
end
if (prebias_C_in > -9'sd128) begin
bias_stage_C_after_bias = prebias_C_in - 9'sd1;
end
end
(prebias_B_after_reset > 41'sd0): begin
bias_stage_B_after_bias = bias_stage_B_minus_N;
if (bias_stage_B_minus_N > 41'sd0) begin
bias_stage_B_after_bias = 41'sd0;
end
if (prebias_C_in < 9'sd127) begin
bias_stage_C_after_bias = prebias_C_in + 9'sd1;
end
end
default: begin
bias_stage_B_after_bias = prebias_B_after_reset;
bias_stage_C_after_bias = prebias_C_in;
end
endcase
end
always_comb begin
result_accept = 1'b0;
if (result_valid && result_ready) begin
result_accept = 1'b1;
end
end
always_comb begin
// The bias stage can retire whenever the local second output slot is free.
// It does not need to see result_ready from the downstream context
// write/error mapper path in the same cycle.
result_slot_open = !result_next_valid;
end
always_comb begin
bias_to_result = 1'b0;
if (bias_valid && result_slot_open) begin
bias_to_result = 1'b1;
end
end
always_comb begin
bias_open = 1'b0;
if (!bias_valid || bias_to_result) begin
bias_open = 1'b1;
end
end
always_comb begin
prebias_to_bias = 1'b0;
if (prebias_valid && bias_open) begin
prebias_to_bias = 1'b1;
end
end
always_comb begin
prebias_open = 1'b0;
if (!prebias_valid || prebias_to_bias) begin
prebias_open = 1'b1;
end
end
always_comb begin
stage_to_prebias = 1'b0;
if (stage_valid && prebias_open) begin
stage_to_prebias = 1'b1;
end
end
always_comb begin
stage_open = 1'b0;
if (!stage_valid || stage_to_prebias) begin
stage_open = 1'b1;
end
end
always_comb begin
s2_to_stage = 1'b0;
if (s2_valid && stage_open) begin
s2_to_stage = 1'b1;
end
end
always_comb begin
s2_open = 1'b0;
if (!s2_valid || s2_to_stage) begin
s2_open = 1'b1;
end
end
always_comb begin
s1_to_s2 = 1'b0;
if (s1_valid && s2_open) begin
s1_to_s2 = 1'b1;
end
end
always_comb begin
s1_open = 1'b0;
if (!s1_valid || s1_to_s2) begin
s1_open = 1'b1;
end
end
always_comb begin
s0_to_s1 = 1'b0;
if (s0_valid && s1_open) begin
s0_to_s1 = 1'b1;
end
end
always_comb begin
s0_open = 1'b0;
if (!s0_valid || s0_to_s1) begin
s0_open = 1'b1;
end
end
always_comb begin
accept_update = 1'b0;
if (update_valid && update_ready) begin
accept_update = 1'b1;
end
end
always_comb begin
result_direct_from_bias = 1'b0;
if (bias_to_result && !result_valid) begin
result_direct_from_bias = 1'b1;
end
end
always_comb begin
result_store_next = 1'b0;
if (bias_to_result && result_valid) begin
result_store_next = 1'b1;
end
end
always_comb begin
result_promote_next = 1'b0;
if (result_next_valid && !result_valid) begin
result_promote_next = 1'b1;
end
end
always_comb begin
update_ready = 1'b0;
if (!update_next_valid) begin
update_ready = 1'b1;
end
end
always_comb begin
update_load_input = 1'b0;
if (accept_update && (!s0_valid || s0_to_s1)) begin
update_load_input = 1'b1;
end
end
always_comb begin
update_store_next = 1'b0;
if (accept_update && s0_valid && !s0_to_s1) begin
update_store_next = 1'b1;
end
end
always_comb begin
s0_promote_next = 1'b0;
if (s0_to_s1 && update_next_valid) begin
s0_promote_next = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
s0_valid <= 1'b0;
s0_A_in <= 32'd0;
s0_B_in <= 32'sd0;
s0_C_in <= 9'sd0;
s0_N_in <= 16'd0;
s0_Errval <= 32'sd0;
s0_context_index <= 9'd0;
s0_strip_last_pixel <= 1'b0;
s0_qbpp <= 5'd0;
s0_LIMIT <= 7'd0;
s0_NEAR <= 6'd0;
s0_RESET <= 16'd0;
update_next_valid <= 1'b0;
update_next_A_in <= 32'd0;
update_next_B_in <= 32'sd0;
update_next_C_in <= 9'sd0;
update_next_N_in <= 16'd0;
update_next_Errval <= 32'sd0;
update_next_context_index <= 9'd0;
update_next_strip_last_pixel <= 1'b0;
update_next_qbpp <= 5'd0;
update_next_LIMIT <= 7'd0;
update_next_NEAR <= 6'd0;
update_next_RESET <= 16'd0;
s1_valid <= 1'b0;
s1_A_accum <= 32'd0;
s1_B_in <= 32'sd0;
s1_C_in <= 9'sd0;
s1_N_in <= 16'd0;
s1_N_halved_plus_one <= 16'd0;
s1_RESET <= 16'd0;
s1_k <= 5'd0;
s1_Errval <= 32'sd0;
s1_context_index <= 9'd0;
s1_strip_last_pixel <= 1'b0;
s1_qbpp <= 5'd0;
s1_LIMIT <= 7'd0;
s1_map_invert <= 1'b0;
s1_Errval_ext <= 33'sd0;
s1_near_scale <= 8'sd1;
s2_valid <= 1'b0;
s2_A_accum <= 32'd0;
s2_B_in <= 32'sd0;
s2_B_delta <= 41'sd0;
s2_C_in <= 9'sd0;
s2_N_in <= 16'd0;
s2_N_halved_plus_one <= 16'd0;
s2_RESET <= 16'd0;
s2_k <= 5'd0;
s2_Errval <= 32'sd0;
s2_context_index <= 9'd0;
s2_strip_last_pixel <= 1'b0;
s2_qbpp <= 5'd0;
s2_LIMIT <= 7'd0;
s2_map_invert <= 1'b0;
stage_valid <= 1'b0;
stage_A_accum <= 32'd0;
stage_B_accum <= 41'sd0;
stage_C_in <= 9'sd0;
stage_N_in <= 16'd0;
stage_N_halved_plus_one <= 16'd0;
stage_RESET <= 16'd0;
stage_k <= 5'd0;
stage_Errval <= 32'sd0;
stage_context_index <= 9'd0;
stage_strip_last_pixel <= 1'b0;
stage_qbpp <= 5'd0;
stage_LIMIT <= 7'd0;
stage_map_invert <= 1'b0;
prebias_valid <= 1'b0;
prebias_A_after_reset <= 32'd0;
prebias_B_after_reset <= 41'sd0;
prebias_C_in <= 9'sd0;
prebias_N_after_increment <= 16'd0;
prebias_k <= 5'd0;
prebias_Errval <= 32'sd0;
prebias_context_index <= 9'd0;
prebias_strip_last_pixel <= 1'b0;
prebias_qbpp <= 5'd0;
prebias_LIMIT <= 7'd0;
prebias_map_invert <= 1'b0;
bias_valid <= 1'b0;
bias_A_after_reset <= 32'd0;
bias_B_after_bias <= 32'sd0;
bias_C_after_bias <= 9'sd0;
bias_N_after_increment <= 16'd0;
bias_k <= 5'd0;
bias_Errval <= 32'sd0;
bias_context_index <= 9'd0;
bias_strip_last_pixel <= 1'b0;
bias_qbpp <= 5'd0;
bias_LIMIT <= 7'd0;
bias_map_invert <= 1'b0;
result_valid <= 1'b0;
result_next_valid <= 1'b0;
result_next_k <= 5'd0;
result_next_Errval_out <= 32'sd0;
result_next_context_index_out <= 9'd0;
result_next_strip_last_pixel_out <= 1'b0;
result_next_qbpp_out <= 5'd0;
result_next_LIMIT_out <= 7'd0;
result_next_map_invert <= 1'b0;
result_next_A_out <= 32'd0;
result_next_B_out <= 32'sd0;
result_next_C_out <= 9'sd0;
result_next_N_out <= 16'd0;
k <= 5'd0;
Errval_out <= 32'sd0;
context_index_out <= 9'd0;
strip_last_pixel_out <= 1'b0;
qbpp_out <= 5'd0;
LIMIT_out <= 7'd0;
map_invert <= 1'b0;
A_out <= 32'd0;
B_out <= 32'sd0;
C_out <= 9'sd0;
N_out <= 16'd0;
end else begin
if (result_accept && !result_promote_next && !result_direct_from_bias) begin
result_valid <= 1'b0;
end
if (result_promote_next) begin
result_valid <= 1'b1;
k <= result_next_k;
Errval_out <= result_next_Errval_out;
context_index_out <= result_next_context_index_out;
strip_last_pixel_out <= result_next_strip_last_pixel_out;
qbpp_out <= result_next_qbpp_out;
LIMIT_out <= result_next_LIMIT_out;
map_invert <= result_next_map_invert;
A_out <= result_next_A_out;
B_out <= result_next_B_out;
C_out <= result_next_C_out;
N_out <= result_next_N_out;
result_next_valid <= 1'b0;
end
if (result_direct_from_bias) begin
result_valid <= 1'b1;
k <= bias_k;
Errval_out <= bias_Errval;
context_index_out <= bias_context_index;
strip_last_pixel_out <= bias_strip_last_pixel;
qbpp_out <= bias_qbpp;
LIMIT_out <= bias_LIMIT;
map_invert <= bias_map_invert;
A_out <= bias_A_after_reset;
B_out <= bias_B_after_bias;
C_out <= bias_C_after_bias;
N_out <= bias_N_after_increment;
end
if (result_store_next) begin
result_next_valid <= 1'b1;
result_next_k <= bias_k;
result_next_Errval_out <= bias_Errval;
result_next_context_index_out <= bias_context_index;
result_next_strip_last_pixel_out <= bias_strip_last_pixel;
result_next_qbpp_out <= bias_qbpp;
result_next_LIMIT_out <= bias_LIMIT;
result_next_map_invert <= bias_map_invert;
result_next_A_out <= bias_A_after_reset;
result_next_B_out <= bias_B_after_bias;
result_next_C_out <= bias_C_after_bias;
result_next_N_out <= bias_N_after_increment;
end
if (prebias_to_bias) begin
bias_valid <= 1'b1;
bias_A_after_reset <= prebias_A_after_reset;
bias_B_after_bias <= bias_stage_B_after_bias[31:0];
bias_C_after_bias <= bias_stage_C_after_bias;
bias_N_after_increment <= prebias_N_after_increment;
bias_k <= prebias_k;
bias_Errval <= prebias_Errval;
bias_context_index <= prebias_context_index;
bias_strip_last_pixel <= prebias_strip_last_pixel;
bias_qbpp <= prebias_qbpp;
bias_LIMIT <= prebias_LIMIT;
bias_map_invert <= prebias_map_invert;
end else if (bias_to_result) begin
bias_valid <= 1'b0;
end
if (stage_to_prebias) begin
prebias_valid <= 1'b1;
prebias_A_after_reset <= A_after_reset;
prebias_B_after_reset <= B_after_reset;
prebias_C_in <= stage_C_in;
prebias_N_after_increment <= N_after_increment;
prebias_k <= stage_k;
prebias_Errval <= stage_Errval;
prebias_context_index <= stage_context_index;
prebias_strip_last_pixel <= stage_strip_last_pixel;
prebias_qbpp <= stage_qbpp;
prebias_LIMIT <= stage_LIMIT;
prebias_map_invert <= stage_map_invert;
end else if (prebias_to_bias) begin
prebias_valid <= 1'b0;
end
if (s2_to_stage) begin
stage_valid <= 1'b1;
stage_A_accum <= s2_A_accum;
stage_B_accum <= B_accum_next;
stage_C_in <= s2_C_in;
stage_N_in <= s2_N_in;
stage_N_halved_plus_one <= s2_N_halved_plus_one;
stage_RESET <= s2_RESET;
stage_k <= s2_k;
stage_Errval <= s2_Errval;
stage_context_index <= s2_context_index;
stage_strip_last_pixel <= s2_strip_last_pixel;
stage_qbpp <= s2_qbpp;
stage_LIMIT <= s2_LIMIT;
stage_map_invert <= s2_map_invert;
end else if (stage_to_prebias) begin
stage_valid <= 1'b0;
end
if (s1_to_s2) begin
s2_valid <= 1'b1;
s2_A_accum <= s1_A_accum;
s2_B_in <= s1_B_in;
s2_B_delta <= B_delta;
s2_C_in <= s1_C_in;
s2_N_in <= s1_N_in;
s2_N_halved_plus_one <= s1_N_halved_plus_one;
s2_RESET <= s1_RESET;
s2_k <= s1_k;
s2_Errval <= s1_Errval;
s2_context_index <= s1_context_index;
s2_strip_last_pixel <= s1_strip_last_pixel;
s2_qbpp <= s1_qbpp;
s2_LIMIT <= s1_LIMIT;
s2_map_invert <= s1_map_invert;
end else if (s2_to_stage) begin
s2_valid <= 1'b0;
end
if (s0_to_s1) begin
s1_valid <= 1'b1;
s1_A_accum <= A_accum_next;
s1_B_in <= s0_B_in;
s1_C_in <= s0_C_in;
s1_N_in <= s0_N_in;
s1_N_halved_plus_one <= N_halved_plus_one_next;
s1_RESET <= s0_RESET;
s1_k <= k_next;
s1_Errval <= s0_Errval;
s1_context_index <= s0_context_index;
s1_strip_last_pixel <= s0_strip_last_pixel;
s1_qbpp <= s0_qbpp;
s1_LIMIT <= s0_LIMIT;
s1_map_invert <= map_invert_next;
s1_Errval_ext <= Errval_ext;
s1_near_scale <= near_scale;
end else if (s1_to_s2) begin
s1_valid <= 1'b0;
end
if (s0_promote_next) begin
s0_valid <= 1'b1;
s0_A_in <= update_next_A_in;
s0_B_in <= update_next_B_in;
s0_C_in <= update_next_C_in;
s0_N_in <= update_next_N_in;
s0_Errval <= update_next_Errval;
s0_context_index <= update_next_context_index;
s0_strip_last_pixel <= update_next_strip_last_pixel;
s0_qbpp <= update_next_qbpp;
s0_LIMIT <= update_next_LIMIT;
s0_NEAR <= update_next_NEAR;
s0_RESET <= update_next_RESET;
update_next_valid <= 1'b0;
end else if (s0_to_s1) begin
s0_valid <= 1'b0;
end
if (update_load_input) begin
s0_valid <= 1'b1;
s0_A_in <= A_in;
s0_B_in <= B_in;
s0_C_in <= C_in;
s0_N_in <= N_in;
s0_Errval <= Errval;
s0_context_index <= context_index_in;
s0_strip_last_pixel <= strip_last_pixel_in;
s0_qbpp <= qbpp_in;
s0_LIMIT <= LIMIT_in;
s0_NEAR <= NEAR;
s0_RESET <= RESET;
end
if (update_store_next) begin
update_next_valid <= 1'b1;
update_next_A_in <= A_in;
update_next_B_in <= B_in;
update_next_C_in <= C_in;
update_next_N_in <= N_in;
update_next_Errval <= Errval;
update_next_context_index <= context_index_in;
update_next_strip_last_pixel <= strip_last_pixel_in;
update_next_qbpp <= qbpp_in;
update_next_LIMIT <= LIMIT_in;
update_next_NEAR <= NEAR;
update_next_RESET <= RESET;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,144 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
// Figure : N/A
// Table : N/A
// Pseudocode : Map signed Errval into non-negative MErrval
// Trace : docs/jls_traceability.md#golomb-rice-encoding
// Example : Errval=-3 maps to MErrval=5; Errval=3 maps to MErrval=6.
//
// Registered error mapper. The upstream regular-mode stage computes Errval,
// k, LIMIT, qbpp, and whether the context correction inverts Errval before
// mapping. This module only performs the standard signed-to-unsigned mapping
// and forwards coding parameters to jls_golomb_encoder.
`default_nettype none
module jls_error_mapper (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Input prediction-error event is valid.
input var logic err_valid,
// This mapper can accept the current prediction-error event.
output logic err_ready,
// Standard signed prediction error after quantization and context sign.
input var logic signed [31:0] Errval,
// High when context correction requests bitwise inversion before mapping.
input var logic map_invert,
// JPEG-LS Golomb parameter k.
input var logic [4:0] k,
// JPEG-LS LIMIT parameter for the current coding mode.
input var logic [6:0] limit,
// JPEG-LS qbpp parameter for the current coding mode.
input var logic [4:0] qbpp,
// Last pixel of the current strip frame.
input var logic strip_last_pixel,
// Mapped-error event is valid.
output logic mapped_valid,
// Downstream Golomb encoder accepted the mapped-error event.
input var logic mapped_ready,
// Standard non-negative mapped error value.
output logic [31:0] MErrval,
// Forwarded coding parameters.
output logic [4:0] mapped_k,
output logic [6:0] mapped_limit,
output logic [4:0] mapped_qbpp,
// Forwarded strip boundary flag.
output logic mapped_strip_last_pixel
);
// Source value after optional context correction.
logic signed [31:0] corrected_Errval;
logic signed [32:0] corrected_Errval_ext;
logic signed [32:0] abs_Errval_ext;
logic [32:0] MErrval_ext;
// Handshake terms.
logic slot_open;
logic accept_err;
always_comb begin
corrected_Errval = Errval;
if (map_invert) begin
corrected_Errval = ~Errval;
end
end
always_comb begin
corrected_Errval_ext = {corrected_Errval[31], corrected_Errval};
end
always_comb begin
abs_Errval_ext = corrected_Errval_ext;
if (corrected_Errval_ext < 33'sd0) begin
abs_Errval_ext = -corrected_Errval_ext;
end
end
always_comb begin
MErrval_ext = abs_Errval_ext[32:0] << 1;
if (corrected_Errval_ext < 33'sd0) begin
MErrval_ext = (abs_Errval_ext[32:0] << 1) - 33'd1;
end
end
always_comb begin
slot_open = 1'b0;
if (!mapped_valid || mapped_ready) begin
slot_open = 1'b1;
end
end
always_comb begin
err_ready = slot_open;
end
always_comb begin
accept_err = 1'b0;
if (err_valid && err_ready) begin
accept_err = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
mapped_valid <= 1'b0;
MErrval <= 32'd0;
mapped_k <= 5'd0;
mapped_limit <= 7'd0;
mapped_qbpp <= 5'd0;
mapped_strip_last_pixel <= 1'b0;
end else begin
if (mapped_valid && mapped_ready && !accept_err) begin
mapped_valid <= 1'b0;
end
if (accept_err) begin
mapped_valid <= 1'b1;
MErrval <= MErrval_ext[31:0];
mapped_k <= k;
mapped_limit <= limit;
mapped_qbpp <= qbpp;
mapped_strip_last_pixel <= strip_last_pixel;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,371 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
// Figure : N/A
// Table : N/A
// Pseudocode : Encode MErrval using k, LIMIT, and qbpp
// Trace : docs/jls_traceability.md#golomb-rice-encoding
// Example : MErrval=5, k=1 emits bits 0,0,1,1.
//
// Golomb code-event generator. This module starts from the standard mapped
// error value MErrval and Golomb parameter k. Earlier pipeline stages are
// responsible for computing Errval, MErrval, k, LIMIT, and qbpp from the
// JPEG-LS context variables.
`default_nettype none
module jls_golomb_encoder #(
// Maximum code bits sent to jls_bit_packer in one event.
parameter int MAX_CODE_BITS = 64
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// MErrval input event is valid.
input var logic mapped_valid,
// This encoder can accept MErrval and coding parameters.
output logic mapped_ready,
// JPEG-LS mapped error value, named after the standard pseudocode variable.
input var logic [31:0] MErrval,
// JPEG-LS Golomb parameter k.
input var logic [4:0] k,
// JPEG-LS LIMIT parameter for the current coding mode.
input var logic [6:0] limit,
// JPEG-LS qbpp parameter for the current coding mode.
input var logic [4:0] qbpp,
// Last pixel of the current strip frame.
input var logic mapped_strip_last_pixel,
// Generated left-aligned code event for jls_bit_packer.
output logic code_valid,
// jls_bit_packer accepted the current code event.
input var logic code_ready,
// Left-aligned code bits. The first bit is code_bits[MAX_CODE_BITS-1].
output logic [MAX_CODE_BITS-1:0] code_bits,
// Number of valid bits in code_bits.
output logic [6:0] code_bit_count,
// Encoder completed the current MErrval event.
output logic mapped_done,
// Encoder completed the last pixel of the current strip frame.
output logic mapped_last_done
);
// State machine for emitting prefix chunks and optional suffix bits.
typedef enum logic [2:0] {
STATE_IDLE = 3'd0,
STATE_PREP = 3'd1,
STATE_SUFFIX_PREP = 3'd2,
STATE_PREFIX = 3'd3,
STATE_SUFFIX = 3'd4,
STATE_DONE = 3'd5,
STATE_SELECT = 3'd6
} golomb_state_e;
// Maximum code event size as a runtime-comparable constant.
localparam logic [6:0] MAX_CODE_BITS_VALUE = MAX_CODE_BITS;
// Current state.
golomb_state_e state;
// Latched coding parameters for the active mapped-error event.
logic [6:0] prefix_remaining;
logic [31:0] suffix_bits;
logic [6:0] suffix_bit_count;
logic active_strip_last_pixel;
logic [31:0] latched_MErrval;
logic [31:0] latched_MErrval_minus_one;
logic [4:0] latched_k;
logic [6:0] latched_limit;
logic [4:0] latched_qbpp;
logic latched_strip_last_pixel;
logic [31:0] suffix_base_bits;
logic [31:0] high_bits_latched;
logic [6:0] prefix_threshold_latched;
logic [6:0] normal_prefix_count_latched;
logic [6:0] limited_prefix_count_latched;
// Pending code-event bookkeeping. Counters are updated when code_ready
// accepts the event.
logic pending_prefix_event;
logic pending_prefix_last;
logic pending_suffix_event;
logic [6:0] pending_prefix_count;
// Combinational input analysis.
logic accept_mapped;
logic [31:0] high_bits;
logic [6:0] prefix_threshold;
logic use_regular_golomb_path;
logic [6:0] normal_prefix_count;
logic [6:0] limited_prefix_count;
logic [6:0] selected_prefix_count;
logic [6:0] selected_suffix_count;
logic high_bits_upper_nonzero;
logic [31:0] suffix_mask;
logic [31:0] selected_suffix_bits;
// Combinational code-event builders.
logic code_slot_open;
logic [6:0] prefix_emit_count;
logic prefix_emit_is_last;
logic [MAX_CODE_BITS-1:0] prefix_event_bits;
logic [MAX_CODE_BITS-1:0] suffix_event_bits;
// Loop index declared outside procedural blocks per project coding style.
integer suffix_bit_index;
always_comb begin
mapped_ready = 1'b0;
if (state == STATE_IDLE && !code_valid) begin
mapped_ready = 1'b1;
end
end
always_comb begin
accept_mapped = 1'b0;
if (mapped_valid && mapped_ready) begin
accept_mapped = 1'b1;
end
end
always_comb begin
code_slot_open = 1'b0;
if (!code_valid || code_ready) begin
code_slot_open = 1'b1;
end
end
always_comb begin
high_bits = latched_MErrval >> latched_k;
end
always_comb begin
prefix_threshold = 7'd0;
if (latched_limit > ({2'b00, latched_qbpp} + 7'd1)) begin
prefix_threshold = latched_limit - {2'b00, latched_qbpp} - 7'd1;
end
end
always_comb begin
high_bits_upper_nonzero = 1'b0;
if (high_bits_latched[31:7] != 25'd0) begin
high_bits_upper_nonzero = 1'b1;
end
end
always_comb begin
use_regular_golomb_path = 1'b0;
if (!high_bits_upper_nonzero &&
high_bits_latched[6:0] < prefix_threshold_latched) begin
use_regular_golomb_path = 1'b1;
end
end
always_comb begin
normal_prefix_count = high_bits[6:0] + 7'd1;
end
always_comb begin
limited_prefix_count = 7'd1;
if (latched_limit > {2'b00, latched_qbpp}) begin
limited_prefix_count = latched_limit - {2'b00, latched_qbpp};
end
end
always_comb begin
selected_prefix_count = limited_prefix_count_latched;
selected_suffix_count = {2'b00, latched_qbpp};
if (use_regular_golomb_path) begin
selected_prefix_count = normal_prefix_count_latched;
selected_suffix_count = {2'b00, latched_k};
end
end
always_comb begin
suffix_mask = 32'd0;
if (suffix_bit_count != 7'd0) begin
suffix_mask = (32'd1 << suffix_bit_count[4:0]) - 32'd1;
end
end
always_comb begin
selected_suffix_bits = suffix_base_bits & suffix_mask;
end
always_comb begin
prefix_emit_count = prefix_remaining;
prefix_emit_is_last = 1'b1;
if (prefix_remaining > MAX_CODE_BITS_VALUE) begin
prefix_emit_count = MAX_CODE_BITS_VALUE;
prefix_emit_is_last = 1'b0;
end
end
always_comb begin
prefix_event_bits = {MAX_CODE_BITS{1'b0}};
if (prefix_emit_is_last && prefix_emit_count != 7'd0) begin
prefix_event_bits[MAX_CODE_BITS_VALUE - prefix_emit_count] = 1'b1;
end
end
always_comb begin
suffix_event_bits = {MAX_CODE_BITS{1'b0}};
for (suffix_bit_index = 0; suffix_bit_index < MAX_CODE_BITS; suffix_bit_index = suffix_bit_index + 1) begin
if (suffix_bit_index < suffix_bit_count) begin
suffix_event_bits[MAX_CODE_BITS - 1 - suffix_bit_index] =
suffix_bits[suffix_bit_count - 7'd1 - suffix_bit_index[6:0]];
end
end
end
always_ff @(posedge clk) begin
if (rst) begin
state <= STATE_IDLE;
prefix_remaining <= 7'd0;
suffix_bits <= 32'd0;
suffix_bit_count <= 7'd0;
active_strip_last_pixel <= 1'b0;
latched_MErrval <= 32'd0;
latched_MErrval_minus_one <= 32'd0;
latched_k <= 5'd0;
latched_limit <= 7'd0;
latched_qbpp <= 5'd0;
latched_strip_last_pixel <= 1'b0;
suffix_base_bits <= 32'd0;
high_bits_latched <= 32'd0;
prefix_threshold_latched <= 7'd0;
normal_prefix_count_latched <= 7'd0;
limited_prefix_count_latched <= 7'd0;
pending_prefix_event <= 1'b0;
pending_prefix_last <= 1'b0;
pending_suffix_event <= 1'b0;
pending_prefix_count <= 7'd0;
code_valid <= 1'b0;
code_bits <= {MAX_CODE_BITS{1'b0}};
code_bit_count <= 7'd0;
mapped_done <= 1'b0;
mapped_last_done <= 1'b0;
end else begin
mapped_done <= 1'b0;
mapped_last_done <= 1'b0;
if (code_valid && code_ready) begin
code_valid <= 1'b0;
code_bits <= {MAX_CODE_BITS{1'b0}};
code_bit_count <= 7'd0;
if (pending_prefix_event) begin
prefix_remaining <= prefix_remaining - pending_prefix_count;
if (pending_prefix_last) begin
if (suffix_bit_count != 7'd0) begin
state <= STATE_SUFFIX;
end else begin
state <= STATE_DONE;
end
end
end
if (pending_suffix_event) begin
state <= STATE_DONE;
end
pending_prefix_event <= 1'b0;
pending_prefix_last <= 1'b0;
pending_suffix_event <= 1'b0;
pending_prefix_count <= 7'd0;
end
if (accept_mapped) begin
latched_MErrval <= MErrval;
latched_MErrval_minus_one <= MErrval - 32'd1;
latched_k <= k;
latched_limit <= limit;
latched_qbpp <= qbpp;
latched_strip_last_pixel <= mapped_strip_last_pixel;
state <= STATE_PREP;
end else if (code_slot_open && !code_valid) begin
case (state)
STATE_PREP: begin
// Stage note : The standard Annex G.2 Golomb selection is
// split across STATE_PREP and STATE_SELECT. This stage registers
// MErrval>>k, LIMIT/qbpp threshold, and candidate prefix lengths
// before the compare/mux that writes prefix_remaining.
high_bits_latched <= high_bits;
prefix_threshold_latched <= prefix_threshold;
normal_prefix_count_latched <= normal_prefix_count;
limited_prefix_count_latched <= limited_prefix_count;
active_strip_last_pixel <= latched_strip_last_pixel;
state <= STATE_SELECT;
end
STATE_SELECT: begin
// Stage note : Use the registered high_bits value. The standard
// condition high_bits < LIMIT-qbpp-1 is implemented as an upper-bit
// zero test plus a 7-bit compare, avoiding a long 32-bit carry path.
prefix_remaining <= selected_prefix_count;
suffix_bit_count <= selected_suffix_count;
suffix_base_bits <= latched_MErrval_minus_one;
if (use_regular_golomb_path) begin
suffix_base_bits <= latched_MErrval;
end
state <= STATE_SUFFIX_PREP;
end
STATE_SUFFIX_PREP: begin
suffix_bits <= selected_suffix_bits;
state <= STATE_PREFIX;
end
STATE_PREFIX: begin
if (prefix_remaining != 7'd0) begin
code_valid <= 1'b1;
code_bits <= prefix_event_bits;
code_bit_count <= prefix_emit_count;
pending_prefix_event <= 1'b1;
pending_prefix_last <= prefix_emit_is_last;
pending_prefix_count <= prefix_emit_count;
end
end
STATE_SUFFIX: begin
if (suffix_bit_count != 7'd0) begin
code_valid <= 1'b1;
code_bits <= suffix_event_bits;
code_bit_count <= suffix_bit_count;
pending_suffix_event <= 1'b1;
end else begin
state <= STATE_DONE;
end
end
STATE_DONE: begin
mapped_done <= 1'b1;
mapped_last_done <= active_strip_last_pixel;
active_strip_last_pixel <= 1'b0;
state <= STATE_IDLE;
end
default: begin
state <= STATE_IDLE;
end
endcase
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,442 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex C.2.2 frame header, C.2.3 scan header, C.2.4.1 LSE
// Figure : N/A
// Table : Table C.1 preset parameters, Table C.2 RESET, Table C.3 defaults
// Pseudocode : JPEG-LS marker segment emission before and after one scan
// Trace : docs/jls_traceability.md#jls-header-markers
// Example : For PIX_WIDTH=8 and width=32, SOF55 emits P=8, Y=16, X=32.
//
// Header writer for one standalone grayscale JPEG-LS strip frame. A strip
// start command emits SOI, SOF55, LSE preset coding parameters, and SOS. A
// strip finish command emits EOI after the entropy payload has been flushed by
// the bit packer.
`default_nettype none
module jls_header_writer #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Start command for one standalone strip frame.
input var logic strip_start_valid,
// High when a strip start command can be accepted.
output logic strip_start_ready,
// Marks the first strip of an original input image for ofifo_wdata[8].
input var logic original_image_first_strip,
// JPEG-LS frame width written to SOF55.X.
input var logic [12:0] strip_width,
// JPEG-LS frame height written to SOF55.Y.
input var logic [12:0] strip_height,
// NEAR parameter written to the JPEG-LS SOS segment.
input var logic [5:0] near,
// JPEG-LS LSE MAXVAL preset coding parameter.
input var logic [15:0] preset_maxval,
// JPEG-LS LSE T1 preset coding parameter.
input var logic [15:0] preset_t1,
// JPEG-LS LSE T2 preset coding parameter.
input var logic [15:0] preset_t2,
// JPEG-LS LSE T3 preset coding parameter.
input var logic [15:0] preset_t3,
// JPEG-LS LSE RESET preset coding parameter.
input var logic [15:0] preset_reset,
// Finish command after the strip entropy payload has been byte-flushed.
input var logic strip_finish_valid,
// High when a strip finish command can be accepted.
output logic strip_finish_ready,
// Encoded marker byte is valid.
output logic byte_valid,
// Downstream byte buffer can accept the marker byte.
input var logic byte_ready,
// Encoded marker byte in JPEG marker-stream order.
output logic [7:0] byte_data,
// Sideband for the first SOI byte of an original input image.
output logic original_image_start,
// Header completion pulse after the last SOS byte is accepted.
output logic header_done,
// EOI completion pulse after the EOI marker is accepted.
output logic eoi_done
);
import jls_common_pkg::*;
// Header stream is SOI(2) + SOF55(13) + LSE(15) + SOS(10).
localparam logic [5:0] HEADER_LAST_INDEX = 6'd39;
// EOI stream is FF D9.
localparam logic [1:0] EOI_LAST_INDEX = 2'd1;
// State machine split keeps the command path shallow and the byte lookup
// separate from state update.
typedef enum logic [1:0] {
STATE_IDLE = 2'd0,
STATE_HEADER = 2'd1,
STATE_EOI = 2'd2
} header_state_e;
// Current and next state for marker emission.
header_state_e state;
header_state_e state_next;
// Byte indexes inside the header and EOI byte sequences.
logic [5:0] header_index;
logic [5:0] header_index_next;
logic [1:0] eoi_index;
logic [1:0] eoi_index_next;
// Latched command fields used while emitting a strip header.
logic latched_original_image_first_strip;
logic [12:0] latched_strip_width;
logic [12:0] latched_strip_height;
logic [5:0] latched_near;
logic [15:0] latched_preset_maxval;
logic [15:0] latched_preset_t1;
logic [15:0] latched_preset_t2;
logic [15:0] latched_preset_t3;
logic [15:0] latched_preset_reset;
// Decoded byte fields from latched command fields.
logic [7:0] strip_width_hi;
logic [7:0] strip_width_lo;
logic [7:0] strip_height_hi;
logic [7:0] strip_height_lo;
logic [7:0] near_byte;
logic [7:0] preset_maxval_hi;
logic [7:0] preset_maxval_lo;
logic [7:0] preset_t1_hi;
logic [7:0] preset_t1_lo;
logic [7:0] preset_t2_hi;
logic [7:0] preset_t2_lo;
logic [7:0] preset_t3_hi;
logic [7:0] preset_t3_lo;
logic [7:0] preset_reset_hi;
logic [7:0] preset_reset_lo;
// Output and handshake next-state signals.
logic accept_start;
logic accept_finish;
logic output_fire;
logic byte_valid_next;
logic [7:0] byte_data_next;
logic original_image_start_next;
logic [5:0] header_byte_index;
logic [1:0] eoi_byte_index;
logic [7:0] header_byte;
logic [7:0] eoi_byte;
always_comb begin
strip_width_hi = {3'b000, latched_strip_width[12:8]};
strip_width_lo = latched_strip_width[7:0];
strip_height_hi = {3'b000, latched_strip_height[12:8]};
strip_height_lo = latched_strip_height[7:0];
near_byte = {2'b00, latched_near};
end
always_comb begin
preset_maxval_hi = latched_preset_maxval[15:8];
preset_maxval_lo = latched_preset_maxval[7:0];
preset_t1_hi = latched_preset_t1[15:8];
preset_t1_lo = latched_preset_t1[7:0];
preset_t2_hi = latched_preset_t2[15:8];
preset_t2_lo = latched_preset_t2[7:0];
preset_t3_hi = latched_preset_t3[15:8];
preset_t3_lo = latched_preset_t3[7:0];
preset_reset_hi = latched_preset_reset[15:8];
preset_reset_lo = latched_preset_reset[7:0];
end
always_comb begin
strip_start_ready = 1'b0;
strip_finish_ready = 1'b0;
if (state == STATE_IDLE) begin
strip_start_ready = 1'b1;
if (!strip_start_valid) begin
strip_finish_ready = 1'b1;
end
end
end
always_comb begin
accept_start = 1'b0;
if (strip_start_valid && strip_start_ready) begin
accept_start = 1'b1;
end
end
always_comb begin
accept_finish = 1'b0;
if (strip_finish_valid && strip_finish_ready) begin
accept_finish = 1'b1;
end
end
always_comb begin
output_fire = 1'b0;
if (byte_valid && byte_ready) begin
output_fire = 1'b1;
end
end
always_comb begin
header_byte_index = header_index;
if (state == STATE_IDLE && accept_start) begin
header_byte_index = 6'd0;
end else if (state == STATE_HEADER && output_fire && header_index != HEADER_LAST_INDEX) begin
header_byte_index = header_index + 6'd1;
end
end
always_comb begin
eoi_byte_index = eoi_index;
if (state == STATE_IDLE && accept_finish) begin
eoi_byte_index = 2'd0;
end else if (state == STATE_EOI && output_fire && eoi_index != EOI_LAST_INDEX) begin
eoi_byte_index = eoi_index + 2'd1;
end
end
always_comb begin
header_byte = 8'h00;
case (header_byte_index)
// SOI: T.87 C.1 uses JPEG marker syntax from T.81 B.1.
6'd0: header_byte = JLS_MARKER_PREFIX;
6'd1: header_byte = JLS_MARKER_SOI;
// SOF55: T.87 C.2.2, one grayscale component.
6'd2: header_byte = JLS_MARKER_PREFIX;
6'd3: header_byte = JLS_MARKER_SOF55;
6'd4: header_byte = 8'h00;
6'd5: header_byte = 8'h0B;
6'd6: header_byte = PIX_WIDTH[7:0];
6'd7: header_byte = strip_height_hi;
6'd8: header_byte = strip_height_lo;
6'd9: header_byte = strip_width_hi;
6'd10: header_byte = strip_width_lo;
6'd11: header_byte = 8'h01;
6'd12: header_byte = 8'h01;
6'd13: header_byte = 8'h11;
6'd14: header_byte = 8'h00;
// LSE preset coding parameters: T.87 C.2.4.1.1, type 1.
6'd15: header_byte = JLS_MARKER_PREFIX;
6'd16: header_byte = JLS_MARKER_LSE;
6'd17: header_byte = 8'h00;
6'd18: header_byte = 8'h0D;
6'd19: header_byte = 8'h01;
6'd20: header_byte = preset_maxval_hi;
6'd21: header_byte = preset_maxval_lo;
6'd22: header_byte = preset_t1_hi;
6'd23: header_byte = preset_t1_lo;
6'd24: header_byte = preset_t2_hi;
6'd25: header_byte = preset_t2_lo;
6'd26: header_byte = preset_t3_hi;
6'd27: header_byte = preset_t3_lo;
6'd28: header_byte = preset_reset_hi;
6'd29: header_byte = preset_reset_lo;
// SOS: T.87 C.2.3, one component, no mapping table, ILV=0.
6'd30: header_byte = JLS_MARKER_PREFIX;
6'd31: header_byte = JLS_MARKER_SOS;
6'd32: header_byte = 8'h00;
6'd33: header_byte = 8'h08;
6'd34: header_byte = 8'h01;
6'd35: header_byte = 8'h01;
6'd36: header_byte = 8'h00;
6'd37: header_byte = near_byte;
6'd38: header_byte = 8'h00;
6'd39: header_byte = 8'h00;
default: header_byte = 8'h00;
endcase
end
always_comb begin
eoi_byte = 8'h00;
case (eoi_byte_index)
2'd0: eoi_byte = JLS_MARKER_PREFIX;
2'd1: eoi_byte = JLS_MARKER_EOI;
default: eoi_byte = 8'h00;
endcase
end
always_comb begin
state_next = state;
header_index_next = header_index;
eoi_index_next = eoi_index;
case (state)
STATE_IDLE: begin
header_index_next = 6'd0;
eoi_index_next = 2'd0;
if (accept_start) begin
state_next = STATE_HEADER;
end else if (accept_finish) begin
state_next = STATE_EOI;
end
end
STATE_HEADER: begin
if (output_fire) begin
if (header_index == HEADER_LAST_INDEX) begin
header_index_next = 6'd0;
state_next = STATE_IDLE;
end else begin
header_index_next = header_index + 6'd1;
end
end
end
STATE_EOI: begin
if (output_fire) begin
if (eoi_index == EOI_LAST_INDEX) begin
eoi_index_next = 2'd0;
state_next = STATE_IDLE;
end else begin
eoi_index_next = eoi_index + 2'd1;
end
end
end
default: begin
state_next = STATE_IDLE;
header_index_next = 6'd0;
eoi_index_next = 2'd0;
end
endcase
end
always_comb begin
byte_valid_next = byte_valid;
byte_data_next = byte_data;
original_image_start_next = original_image_start;
case (state)
STATE_IDLE: begin
byte_valid_next = 1'b0;
byte_data_next = 8'h00;
original_image_start_next = 1'b0;
if (accept_start) begin
byte_valid_next = 1'b1;
byte_data_next = header_byte;
original_image_start_next = original_image_first_strip;
end else if (accept_finish) begin
byte_valid_next = 1'b1;
byte_data_next = eoi_byte;
end
end
STATE_HEADER: begin
if (output_fire) begin
if (header_index == HEADER_LAST_INDEX) begin
byte_valid_next = 1'b0;
byte_data_next = 8'h00;
original_image_start_next = 1'b0;
end else begin
byte_valid_next = 1'b1;
byte_data_next = header_byte;
if (header_byte_index == 6'd0 && latched_original_image_first_strip) begin
original_image_start_next = 1'b1;
end else begin
original_image_start_next = 1'b0;
end
end
end
end
STATE_EOI: begin
if (output_fire) begin
if (eoi_index == EOI_LAST_INDEX) begin
byte_valid_next = 1'b0;
byte_data_next = 8'h00;
end else begin
byte_valid_next = 1'b1;
byte_data_next = eoi_byte;
end
original_image_start_next = 1'b0;
end
end
default: begin
byte_valid_next = 1'b0;
byte_data_next = 8'h00;
original_image_start_next = 1'b0;
end
endcase
end
always_ff @(posedge clk) begin
if (rst) begin
state <= STATE_IDLE;
header_index <= 6'd0;
eoi_index <= 2'd0;
latched_original_image_first_strip <= 1'b0;
latched_strip_width <= 13'd0;
latched_strip_height <= 13'd0;
latched_near <= 6'd0;
latched_preset_maxval <= 16'd0;
latched_preset_t1 <= 16'd0;
latched_preset_t2 <= 16'd0;
latched_preset_t3 <= 16'd0;
latched_preset_reset <= 16'd0;
byte_valid <= 1'b0;
byte_data <= 8'h00;
original_image_start <= 1'b0;
header_done <= 1'b0;
eoi_done <= 1'b0;
end else begin
header_done <= 1'b0;
eoi_done <= 1'b0;
state <= state_next;
header_index <= header_index_next;
eoi_index <= eoi_index_next;
byte_valid <= byte_valid_next;
byte_data <= byte_data_next;
original_image_start <= original_image_start_next;
if (state == STATE_HEADER && output_fire && header_index == HEADER_LAST_INDEX) begin
header_done <= 1'b1;
end
if (state == STATE_EOI && output_fire && eoi_index == EOI_LAST_INDEX) begin
eoi_done <= 1'b1;
end
if (accept_start) begin
latched_original_image_first_strip <= original_image_first_strip;
latched_strip_width <= strip_width;
latched_strip_height <= strip_height;
latched_near <= near;
latched_preset_maxval <= preset_maxval;
latched_preset_t1 <= preset_t1;
latched_preset_t2 <= preset_t2;
latched_preset_t3 <= preset_t3;
latched_preset_reset <= preset_reset;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,354 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.8 Control procedure, Annex D.1-D.3 scan control
// Figure : N/A
// Table : N/A
// Pseudocode : Source image sample ordering before JPEG-LS encoding
// Example : See docs/jls_module_interfaces.md
//
// Input controller for the JPEG-LS encoder. This module aligns the standard
// synchronous FIFO read latency, waits for the original-image SOF sideband,
// samples runtime configuration at the SOF pixel, and emits one registered
// pixel event at a time for the downstream scan controller.
`default_nettype none
module jls_input_ctrl #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16,
// Default image width used when runtime configuration is invalid.
parameter int DEFAULT_PIC_COL = 6144,
// Default image height used when runtime configuration is invalid.
parameter int DEFAULT_PIC_ROW = 256,
// Maximum supported runtime image width.
parameter int MAX_PIC_COL = 6144,
// Maximum supported runtime image height.
parameter int MAX_PIC_ROW = 4096,
// Number of original-image rows in one standalone JPEG-LS strip frame.
parameter int SCAN_ROWS = 16,
// Packed input FIFO width: one SOF sideband bit per input byte lane.
parameter int IFIFO_DATA_WIDTH = ((PIX_WIDTH + 7) / 8) * 9
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Runtime image width sampled when the first SOF pixel is accepted.
input var logic [12:0] cfg_pic_col,
// Runtime image height sampled when the first SOF pixel is accepted.
input var logic [12:0] cfg_pic_row,
// Runtime compression-ratio selector sampled when the first SOF pixel is accepted.
input var logic [3:0] ratio,
// Input FIFO read clock, same frequency and phase as clk.
output logic ififo_rclk,
// Input FIFO read request. FIFO data is valid on the next clk cycle.
output logic ififo_rd,
// Packed SOF flag and grayscale sample from the input FIFO.
input var logic [IFIFO_DATA_WIDTH-1:0] ififo_rdata,
// Input FIFO empty flag.
input var logic ififo_empty,
// Input FIFO almost-empty flag used for read optimization.
input var logic ififo_alempty,
// Downstream stage can accept the current pixel event.
input var logic pixel_ready,
// Pause request from downstream or output-buffer flow control.
input var logic pause_req,
// Pixel event valid flag.
output logic pixel_valid,
// SOF sideband copied from the accepted FIFO word.
output logic pixel_sof,
// Grayscale sample value copied from the accepted FIFO word.
output logic [PIX_WIDTH-1:0] pixel_sample,
// Zero-based column coordinate in the original input image.
output logic [12:0] pixel_x,
// Zero-based row coordinate in the original input image.
output logic [12:0] pixel_y,
// First pixel of the current standalone strip frame.
output logic strip_first_pixel,
// Last pixel of the current standalone strip frame.
output logic strip_last_pixel,
// First pixel of the current original input image.
output logic image_first_pixel,
// Last pixel of the current original input image.
output logic image_last_pixel,
// Effective image width after runtime validation and fallback.
output logic [12:0] active_pic_col,
// Effective image height after runtime validation and fallback.
output logic [12:0] active_pic_row,
// Latched ratio for the current original input image.
output logic [3:0] active_ratio,
// High when cfg_pic_col/cfg_pic_row passed validation at SOF.
output logic active_cfg_valid,
// High while this controller is accepting pixels for an original image.
output logic image_active
);
localparam int SOF_BIT_INDEX = (PIX_WIDTH == 8) ? 8 : 17;
// Constant forms used in comparisons and registered outputs.
localparam logic [12:0] MIN_PIC_COL_VALUE = 13'd16;
localparam logic [12:0] MIN_PIC_ROW_VALUE = 13'd16;
localparam logic [12:0] DEFAULT_PIC_COL_VALUE = DEFAULT_PIC_COL;
localparam logic [12:0] DEFAULT_PIC_ROW_VALUE = DEFAULT_PIC_ROW;
localparam logic [12:0] MAX_PIC_COL_VALUE = MAX_PIC_COL;
localparam logic [12:0] MAX_PIC_ROW_VALUE = MAX_PIC_ROW;
localparam logic [12:0] SCAN_ROWS_VALUE = SCAN_ROWS;
localparam logic [12:0] SCAN_ROWS_LAST_VALUE = SCAN_ROWS - 1;
// One-cycle read latency marker for the synchronous input FIFO.
logic rd_pending;
// Current coordinate for the next accepted in-frame FIFO word.
logic [12:0] x_count;
logic [12:0] y_count;
logic [12:0] strip_row_count;
// Runtime configuration validation signals. They are sampled only at SOF.
logic cfg_col_in_range;
logic cfg_row_in_range;
logic cfg_row_aligned;
logic cfg_dimension_valid;
logic [12:0] cfg_pic_col_selected;
logic [12:0] cfg_pic_row_selected;
// FIFO read control signals.
logic fifo_read_allowed;
logic pixel_slot_open;
logic issue_read;
// Decoded fields from the FIFO data word.
logic fifo_word_sof;
logic [PIX_WIDTH-1:0] fifo_word_sample;
// Coordinate boundary signals for the next accepted in-frame pixel.
logic [12:0] active_pic_col_last;
logic [12:0] active_pic_row_last;
logic x_is_first;
logic x_is_last;
logic strip_row_is_first;
logic y_is_last;
logic strip_row_is_last;
logic strip_first_active_pixel;
assign ififo_rclk = clk;
assign ififo_rd = issue_read;
assign fifo_word_sof = ififo_rdata[SOF_BIT_INDEX];
assign fifo_word_sample = ififo_rdata[PIX_WIDTH-1:0];
always_comb begin
cfg_col_in_range = 1'b0;
if (cfg_pic_col >= MIN_PIC_COL_VALUE && cfg_pic_col <= MAX_PIC_COL_VALUE) begin
cfg_col_in_range = 1'b1;
end
end
always_comb begin
cfg_row_in_range = 1'b0;
if (cfg_pic_row >= MIN_PIC_ROW_VALUE && cfg_pic_row <= MAX_PIC_ROW_VALUE) begin
cfg_row_in_range = 1'b1;
end
end
always_comb begin
cfg_row_aligned = 1'b0;
if ((cfg_pic_row % SCAN_ROWS_VALUE) == 13'd0) begin
cfg_row_aligned = 1'b1;
end
end
always_comb begin
cfg_dimension_valid = 1'b0;
if (cfg_col_in_range && cfg_row_in_range && cfg_row_aligned) begin
cfg_dimension_valid = 1'b1;
end
end
always_comb begin
cfg_pic_col_selected = DEFAULT_PIC_COL_VALUE;
cfg_pic_row_selected = DEFAULT_PIC_ROW_VALUE;
if (cfg_dimension_valid) begin
cfg_pic_col_selected = cfg_pic_col;
cfg_pic_row_selected = cfg_pic_row;
end
end
always_comb begin
fifo_read_allowed = 1'b0;
if (!ififo_alempty || !ififo_empty) begin
fifo_read_allowed = 1'b1;
end
end
always_comb begin
pixel_slot_open = 1'b0;
if (!pixel_valid || pixel_ready) begin
pixel_slot_open = 1'b1;
end
end
always_comb begin
issue_read = 1'b0;
if (fifo_read_allowed && pixel_slot_open && !rd_pending && !pause_req) begin
issue_read = 1'b1;
end
end
always_comb begin
active_pic_col_last = active_pic_col - 13'd1;
active_pic_row_last = active_pic_row - 13'd1;
end
always_comb begin
x_is_first = 1'b0;
if (x_count == 13'd0) begin
x_is_first = 1'b1;
end
end
always_comb begin
x_is_last = 1'b0;
if (x_count == active_pic_col_last) begin
x_is_last = 1'b1;
end
end
always_comb begin
strip_row_is_first = 1'b0;
if (strip_row_count == 13'd0) begin
strip_row_is_first = 1'b1;
end
end
always_comb begin
y_is_last = 1'b0;
if (y_count == active_pic_row_last) begin
y_is_last = 1'b1;
end
end
always_comb begin
strip_row_is_last = 1'b0;
if (strip_row_count == SCAN_ROWS_LAST_VALUE) begin
strip_row_is_last = 1'b1;
end
end
always_comb begin
strip_first_active_pixel = 1'b0;
if (x_is_first && strip_row_is_first) begin
strip_first_active_pixel = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
rd_pending <= 1'b0;
pixel_valid <= 1'b0;
pixel_sof <= 1'b0;
pixel_sample <= {PIX_WIDTH{1'b0}};
pixel_x <= 13'd0;
pixel_y <= 13'd0;
strip_first_pixel <= 1'b0;
strip_last_pixel <= 1'b0;
image_first_pixel <= 1'b0;
image_last_pixel <= 1'b0;
active_pic_col <= DEFAULT_PIC_COL_VALUE;
active_pic_row <= DEFAULT_PIC_ROW_VALUE;
active_ratio <= 4'd0;
active_cfg_valid <= 1'b0;
image_active <= 1'b0;
x_count <= 13'd0;
y_count <= 13'd0;
strip_row_count <= 13'd0;
end else begin
rd_pending <= issue_read;
if (pixel_valid && pixel_ready) begin
pixel_valid <= 1'b0;
end
if (rd_pending) begin
if (!image_active && fifo_word_sof) begin
pixel_valid <= 1'b1;
pixel_sof <= fifo_word_sof;
pixel_sample <= fifo_word_sample;
pixel_x <= 13'd0;
pixel_y <= 13'd0;
strip_first_pixel <= 1'b1;
strip_last_pixel <= 1'b0;
image_first_pixel <= 1'b1;
image_last_pixel <= 1'b0;
active_pic_col <= cfg_pic_col_selected;
active_pic_row <= cfg_pic_row_selected;
active_ratio <= ratio;
active_cfg_valid <= cfg_dimension_valid;
image_active <= 1'b1;
x_count <= 13'd1;
y_count <= 13'd0;
strip_row_count <= 13'd0;
end else if (image_active) begin
pixel_valid <= 1'b1;
pixel_sof <= fifo_word_sof;
pixel_sample <= fifo_word_sample;
pixel_x <= x_count;
pixel_y <= y_count;
strip_first_pixel <= strip_first_active_pixel;
strip_last_pixel <= x_is_last && strip_row_is_last;
image_first_pixel <= 1'b0;
image_last_pixel <= x_is_last && y_is_last;
if (x_is_last) begin
x_count <= 13'd0;
if (y_is_last) begin
y_count <= 13'd0;
strip_row_count <= 13'd0;
image_active <= 1'b0;
end else begin
y_count <= y_count + 13'd1;
if (strip_row_is_last) begin
strip_row_count <= 13'd0;
end else begin
strip_row_count <= strip_row_count + 13'd1;
end
end
end else begin
x_count <= x_count + 13'd1;
end
end
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,876 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.3 context determination, Annex A.7 run mode
// Figure : N/A
// Table : N/A
// Pseudocode : Select regular mode or run mode from local gradients
// Trace : docs/jls_traceability.md#run-mode
// Example : When D1=D2=D3=0 and X=Ra, a run pixel is reconstructed as Ra
// and the accumulated run_length is not entropy coded until the
// run reaches EOL or an interruption sample.
//
// First-pass mode router and run scanner. It consumes neighbor events from
// jls_neighbor_provider, sends non-run contexts to the regular pipeline, and
// accumulates run pixels for jls_run_mode segment encoding. To preserve
// entropy order in the later top-level integration, this module stalls regular,
// interruption, and EOL segment emission behind an outstanding run segment. It
// may still accept later non-EOL matching run pixels because those pixels only
// update run_length_accum and reconstructed history; they do not emit entropy
// until a later segment boundary.
// Once run_length_accum is non-zero, the scanner remains in the standard
// Annex A.7 run loop; the next pixel is judged against RUNval/Ra and EOL, not
// reclassified by the regular-mode gradient context.
`default_nettype none
module jls_mode_router #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Neighbor event from jls_neighbor_provider.
input var logic pixel_valid,
output logic pixel_ready,
input var logic [PIX_WIDTH-1:0] pixel_sample,
input var logic [12:0] pixel_x,
input var logic [12:0] pixel_y,
input var logic pixel_strip_first_pixel,
input var logic pixel_strip_last_pixel,
input var logic [PIX_WIDTH-1:0] Ra,
input var logic [PIX_WIDTH-1:0] Rb,
input var logic [PIX_WIDTH-1:0] Rc,
input var logic [PIX_WIDTH-1:0] Rd,
// Active strip width and NEAR for mode/run decisions.
input var logic [12:0] strip_width,
input var logic [5:0] NEAR,
// Regular-mode event forwarded to jls_predictor.
output logic regular_valid,
input var logic regular_ready,
output logic [PIX_WIDTH-1:0] regular_sample,
output logic [12:0] regular_x,
output logic [12:0] regular_y,
output logic regular_strip_first_pixel,
output logic regular_strip_last_pixel,
output logic [PIX_WIDTH-1:0] regular_Ra,
output logic [PIX_WIDTH-1:0] regular_Rb,
output logic [PIX_WIDTH-1:0] regular_Rc,
output logic [PIX_WIDTH-1:0] regular_Rd,
// Run segment event for jls_run_mode.
output logic run_segment_valid,
input var logic run_segment_ready,
output logic [12:0] run_length,
output logic run_end_of_line,
output logic run_interruption_valid,
output logic [PIX_WIDTH-1:0] run_interruption_sample,
output logic [12:0] run_interruption_x,
output logic [12:0] run_interruption_y,
output logic run_interruption_strip_first_pixel,
output logic run_interruption_strip_last_pixel,
output logic [PIX_WIDTH-1:0] run_Ra,
output logic [PIX_WIDTH-1:0] run_Rb,
// jls_run_mode segment completion. The router does not accept the next pixel
// until the segment's entropy events and optional interruption reconstruction
// have completed.
input var logic run_segment_done,
// Direct reconstructed run pixel. Run-interruption reconstruction comes from
// jls_run_mode and is muxed at top level with this port and the regular path.
output logic run_recon_valid,
input var logic run_recon_ready,
output logic [PIX_WIDTH-1:0] run_recon_sample,
output logic [12:0] run_recon_x,
output logic [12:0] run_recon_y
);
typedef enum logic [1:0] {
STATE_IDLE = 2'd0,
STATE_WAIT_SEG = 2'd1
} router_state_e;
typedef enum logic [1:0] {
EVENT_REGULAR = 2'd0,
EVENT_RUN_PIXEL = 2'd1,
EVENT_RUN_EOF_LINE = 2'd2,
EVENT_INTERRUPT = 2'd3
} event_kind_e;
router_state_e state;
event_kind_e event_kind;
event_kind_e event_kind_next;
// Accumulated run length since the current run-mode segment started.
logic [12:0] run_length_accum;
// Two-entry elastic input stage. This breaks the long combinational path
// from neighbor history values through Annex A.3/A.7 decisions into the
// downstream entropy ready chain. The second slot lets pixel_ready depend
// only on local queue fullness instead of same-cycle downstream event_accept.
logic slot_valid;
logic [PIX_WIDTH-1:0] slot_sample;
logic [12:0] slot_x;
logic [12:0] slot_y;
logic slot_strip_first_pixel;
logic slot_strip_last_pixel;
logic [PIX_WIDTH-1:0] slot_Ra;
logic [PIX_WIDTH-1:0] slot_Rb;
logic [PIX_WIDTH-1:0] slot_Rc;
logic [PIX_WIDTH-1:0] slot_Rd;
logic signed [32:0] slot_D1;
logic signed [32:0] slot_D2;
logic signed [32:0] slot_D3;
logic signed [32:0] slot_sample_minus_Ra;
logic slot_pixel_is_eol;
logic next_slot_valid;
logic [PIX_WIDTH-1:0] next_slot_sample;
logic [12:0] next_slot_x;
logic [12:0] next_slot_y;
logic next_slot_strip_first_pixel;
logic next_slot_strip_last_pixel;
logic [PIX_WIDTH-1:0] next_slot_Ra;
logic [PIX_WIDTH-1:0] next_slot_Rb;
logic [PIX_WIDTH-1:0] next_slot_Rc;
logic [PIX_WIDTH-1:0] next_slot_Rd;
logic signed [32:0] next_slot_D1;
logic signed [32:0] next_slot_D2;
logic signed [32:0] next_slot_D3;
logic signed [32:0] next_slot_sample_minus_Ra;
logic next_slot_pixel_is_eol;
// One-entry classified slot. Annex A.3/A.7 gradient and run decisions are
// registered here before the event queue sees them; this avoids driving the
// event register enables directly from Ra/Rb/Rc/Rd comparison logic.
logic class_valid;
logic class_slot_open;
logic class_can_enqueue;
logic class_to_event;
logic [PIX_WIDTH-1:0] class_sample;
logic [12:0] class_x;
logic [12:0] class_y;
logic class_strip_first_pixel;
logic class_strip_last_pixel;
logic [PIX_WIDTH-1:0] class_Ra;
logic [PIX_WIDTH-1:0] class_Rb;
logic [PIX_WIDTH-1:0] class_Rc;
logic [PIX_WIDTH-1:0] class_Rd;
event_kind_e class_kind;
logic [12:0] class_run_length;
logic class_run_end_of_line;
logic class_run_interruption_valid;
// Registered classified event. This is the timing boundary between Annex
// A.3/A.7 mode decision and the downstream regular/run entropy pipelines.
logic event_valid;
logic [PIX_WIDTH-1:0] event_sample;
logic [12:0] event_x;
logic [12:0] event_y;
logic event_strip_first_pixel;
logic event_strip_last_pixel;
logic [PIX_WIDTH-1:0] event_Ra;
logic [PIX_WIDTH-1:0] event_Rb;
logic [PIX_WIDTH-1:0] event_Rc;
logic [PIX_WIDTH-1:0] event_Rd;
logic [12:0] event_run_length;
logic event_run_end_of_line;
logic event_run_interruption_valid;
logic [12:0] event_run_length_next;
logic event_run_end_of_line_next;
logic event_run_interruption_valid_next;
// Second event slot. A filled second slot lets mode classification advance
// without using the downstream ready chain in the same cycle.
logic event_next_valid;
logic [PIX_WIDTH-1:0] event_next_sample;
logic [12:0] event_next_x;
logic [12:0] event_next_y;
logic event_next_strip_first_pixel;
logic event_next_strip_last_pixel;
logic [PIX_WIDTH-1:0] event_next_Ra;
logic [PIX_WIDTH-1:0] event_next_Rb;
logic [PIX_WIDTH-1:0] event_next_Rc;
logic [PIX_WIDTH-1:0] event_next_Rd;
event_kind_e event_next_kind;
logic [12:0] event_next_run_length;
logic event_next_run_end_of_line;
logic event_next_run_interruption_valid;
// Current input-pixel arithmetic. Only the first subtract layer is computed
// before the slot register; the absolute-value / compare tree for Annex
// A.3/A.7 runs one cycle later from slot_D1/slot_D2/slot_D3.
logic signed [32:0] pixel_Ra_ext;
logic signed [32:0] pixel_Rb_ext;
logic signed [32:0] pixel_Rc_ext;
logic signed [32:0] pixel_Rd_ext;
logic signed [32:0] pixel_sample_ext;
logic signed [32:0] near_ext33;
logic signed [32:0] pixel_D1;
logic signed [32:0] pixel_D2;
logic signed [32:0] pixel_D3;
logic signed [32:0] pixel_sample_minus_Ra;
logic signed [32:0] slot_abs_D1;
logic signed [32:0] slot_abs_D2;
logic signed [32:0] slot_abs_D3;
logic signed [32:0] slot_abs_sample_minus_Ra;
logic slot_run_context_eval;
logic slot_sample_matches_Ra_eval;
logic pixel_is_eol_next;
logic run_mode_active;
logic slot_to_class;
logic event_accept;
logic slot_accept;
logic load_pixel;
logic input_queue_full;
logic run_pixel_accept;
logic interruption_accept;
logic regular_accept;
logic run_eol_segment_accept;
logic [12:0] run_length_with_current;
logic event_queue_full;
logic promote_next_event;
logic load_event_front_from_class;
logic load_event_next_from_class;
// Padding for supported PIX_WIDTH values into 33-bit signed arithmetic.
localparam int SAMPLE_EXT_PAD_WIDTH = 33 - PIX_WIDTH;
always_comb begin
pixel_Ra_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Ra});
pixel_Rb_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rb});
pixel_Rc_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rc});
pixel_Rd_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rd});
pixel_sample_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, pixel_sample});
near_ext33 = $signed({27'd0, NEAR});
end
always_comb begin
pixel_D1 = pixel_Rd_ext - pixel_Rb_ext;
pixel_D2 = pixel_Rb_ext - pixel_Rc_ext;
pixel_D3 = pixel_Rc_ext - pixel_Ra_ext;
pixel_sample_minus_Ra = pixel_sample_ext - pixel_Ra_ext;
end
always_comb begin
slot_abs_D1 = slot_D1;
if (slot_D1 < 33'sd0) begin
slot_abs_D1 = -slot_D1;
end
end
always_comb begin
slot_abs_D2 = slot_D2;
if (slot_D2 < 33'sd0) begin
slot_abs_D2 = -slot_D2;
end
end
always_comb begin
slot_abs_D3 = slot_D3;
if (slot_D3 < 33'sd0) begin
slot_abs_D3 = -slot_D3;
end
end
always_comb begin
slot_abs_sample_minus_Ra = slot_sample_minus_Ra;
if (slot_sample_minus_Ra < 33'sd0) begin
slot_abs_sample_minus_Ra = -slot_sample_minus_Ra;
end
end
always_comb begin
slot_run_context_eval = 1'b0;
if (slot_abs_D1 <= near_ext33 &&
slot_abs_D2 <= near_ext33 &&
slot_abs_D3 <= near_ext33) begin
slot_run_context_eval = 1'b1;
end
end
always_comb begin
slot_sample_matches_Ra_eval = 1'b0;
if (slot_abs_sample_minus_Ra <= near_ext33) begin
slot_sample_matches_Ra_eval = 1'b1;
end
end
always_comb begin
pixel_is_eol_next = 1'b0;
if (pixel_x == (strip_width - 13'd1)) begin
pixel_is_eol_next = 1'b1;
end
end
always_comb begin
run_mode_active = slot_run_context_eval;
if (run_length_accum != 13'd0) begin
run_mode_active = 1'b1;
end
end
always_comb begin
run_length_with_current = run_length_accum + 13'd1;
end
always_comb begin
regular_valid = 1'b0;
if (event_valid && event_kind == EVENT_REGULAR) begin
regular_valid = 1'b1;
end
end
always_comb begin
run_recon_valid = 1'b0;
if (event_valid &&
(event_kind == EVENT_RUN_PIXEL || event_kind == EVENT_RUN_EOF_LINE)) begin
run_recon_valid = 1'b1;
end
end
always_comb begin
run_segment_valid = 1'b0;
if (event_valid &&
(event_kind == EVENT_RUN_EOF_LINE || event_kind == EVENT_INTERRUPT)) begin
run_segment_valid = 1'b1;
end
end
always_comb begin
regular_accept = 1'b0;
if (regular_valid && regular_ready) begin
regular_accept = 1'b1;
end
end
always_comb begin
run_pixel_accept = 1'b0;
if (run_recon_valid && run_recon_ready && event_kind == EVENT_RUN_PIXEL) begin
run_pixel_accept = 1'b1;
end
end
always_comb begin
run_eol_segment_accept = 1'b0;
if (run_recon_valid && run_recon_ready && run_segment_valid && run_segment_ready &&
event_kind == EVENT_RUN_EOF_LINE) begin
run_eol_segment_accept = 1'b1;
end
end
always_comb begin
interruption_accept = 1'b0;
if (run_segment_valid && run_segment_ready && event_kind == EVENT_INTERRUPT) begin
interruption_accept = 1'b1;
end
end
always_comb begin
event_accept = 1'b0;
case (1'b1)
(regular_accept): begin
event_accept = 1'b1;
end
(run_pixel_accept): begin
event_accept = 1'b1;
end
(run_eol_segment_accept): begin
event_accept = 1'b1;
end
(interruption_accept): begin
event_accept = 1'b1;
end
default: begin
event_accept = 1'b0;
end
endcase
end
always_comb begin
slot_accept = slot_to_class;
end
always_comb begin
event_kind_next = EVENT_REGULAR;
if (run_mode_active) begin
if (slot_sample_matches_Ra_eval && slot_pixel_is_eol) begin
event_kind_next = EVENT_RUN_EOF_LINE;
end else if (slot_sample_matches_Ra_eval) begin
event_kind_next = EVENT_RUN_PIXEL;
end else begin
event_kind_next = EVENT_INTERRUPT;
end
end
end
always_comb begin
event_run_length_next = run_length_accum;
event_run_end_of_line_next = 1'b0;
event_run_interruption_valid_next = 1'b1;
if (event_kind_next == EVENT_RUN_EOF_LINE) begin
event_run_length_next = run_length_with_current;
event_run_end_of_line_next = 1'b1;
event_run_interruption_valid_next = 1'b0;
end else if (event_kind_next == EVENT_RUN_PIXEL) begin
event_run_length_next = run_length_with_current;
event_run_end_of_line_next = 1'b0;
event_run_interruption_valid_next = 1'b0;
end else if (event_kind_next == EVENT_REGULAR) begin
event_run_interruption_valid_next = 1'b0;
end
end
always_comb begin
input_queue_full = 1'b0;
if (slot_valid && next_slot_valid) begin
input_queue_full = 1'b1;
end
end
always_comb begin
event_queue_full = 1'b0;
if (event_valid && event_next_valid) begin
event_queue_full = 1'b1;
end
end
always_comb begin
class_slot_open = 1'b0;
if (!class_valid || (class_to_event && class_kind == EVENT_REGULAR)) begin
class_slot_open = 1'b1;
end
end
always_comb begin
slot_to_class = 1'b0;
if (slot_valid && class_slot_open) begin
slot_to_class = 1'b1;
end
end
always_comb begin
class_can_enqueue = 1'b0;
case (state)
STATE_IDLE: begin
class_can_enqueue = 1'b1;
end
STATE_WAIT_SEG: begin
if (class_kind == EVENT_RUN_PIXEL) begin
class_can_enqueue = 1'b1;
end
end
default: begin
class_can_enqueue = 1'b0;
end
endcase
end
always_comb begin
class_to_event = 1'b0;
if (class_valid && class_can_enqueue && !event_queue_full) begin
class_to_event = 1'b1;
end
end
always_comb begin
promote_next_event = 1'b0;
if (!event_valid && event_next_valid) begin
promote_next_event = 1'b1;
end
end
always_comb begin
load_event_front_from_class = 1'b0;
if (class_to_event && !event_valid && !event_next_valid) begin
load_event_front_from_class = 1'b1;
end
end
always_comb begin
load_event_next_from_class = 1'b0;
if (class_to_event && (event_valid || event_next_valid)) begin
load_event_next_from_class = 1'b1;
end
end
always_comb begin
pixel_ready = 1'b0;
if (!input_queue_full) begin
pixel_ready = 1'b1;
end
end
always_comb begin
load_pixel = 1'b0;
if (pixel_valid && pixel_ready) begin
load_pixel = 1'b1;
end
end
always_comb begin
regular_sample = event_sample;
regular_x = event_x;
regular_y = event_y;
regular_strip_first_pixel = event_strip_first_pixel;
regular_strip_last_pixel = event_strip_last_pixel;
regular_Ra = event_Ra;
regular_Rb = event_Rb;
regular_Rc = event_Rc;
regular_Rd = event_Rd;
end
always_comb begin
run_length = event_run_length;
run_end_of_line = event_run_end_of_line;
run_interruption_valid = event_run_interruption_valid;
end
always_comb begin
run_interruption_sample = event_sample;
run_interruption_x = event_x;
run_interruption_y = event_y;
run_interruption_strip_first_pixel = event_strip_first_pixel;
run_interruption_strip_last_pixel = event_strip_last_pixel;
run_Ra = event_Ra;
run_Rb = event_Rb;
end
always_comb begin
run_recon_sample = event_Ra;
run_recon_x = event_x;
run_recon_y = event_y;
end
/*
* The event register above owns all external regular/run outputs. The older
* direct slot-to-output combinational blocks are intentionally absent; this
* keeps slot_Ra/Rb/Rc/Rd comparison logic from feeding top-level entropy
* counters or run-mode DSP inputs in the same cycle.
*/
always_ff @(posedge clk) begin
if (rst) begin
state <= STATE_IDLE;
run_length_accum <= 13'd0;
slot_valid <= 1'b0;
slot_sample <= {PIX_WIDTH{1'b0}};
slot_x <= 13'd0;
slot_y <= 13'd0;
slot_strip_first_pixel <= 1'b0;
slot_strip_last_pixel <= 1'b0;
slot_Ra <= {PIX_WIDTH{1'b0}};
slot_Rb <= {PIX_WIDTH{1'b0}};
slot_Rc <= {PIX_WIDTH{1'b0}};
slot_Rd <= {PIX_WIDTH{1'b0}};
slot_D1 <= 33'sd0;
slot_D2 <= 33'sd0;
slot_D3 <= 33'sd0;
slot_sample_minus_Ra <= 33'sd0;
slot_pixel_is_eol <= 1'b0;
next_slot_valid <= 1'b0;
next_slot_sample <= {PIX_WIDTH{1'b0}};
next_slot_x <= 13'd0;
next_slot_y <= 13'd0;
next_slot_strip_first_pixel <= 1'b0;
next_slot_strip_last_pixel <= 1'b0;
next_slot_Ra <= {PIX_WIDTH{1'b0}};
next_slot_Rb <= {PIX_WIDTH{1'b0}};
next_slot_Rc <= {PIX_WIDTH{1'b0}};
next_slot_Rd <= {PIX_WIDTH{1'b0}};
next_slot_D1 <= 33'sd0;
next_slot_D2 <= 33'sd0;
next_slot_D3 <= 33'sd0;
next_slot_sample_minus_Ra <= 33'sd0;
next_slot_pixel_is_eol <= 1'b0;
class_valid <= 1'b0;
class_sample <= {PIX_WIDTH{1'b0}};
class_x <= 13'd0;
class_y <= 13'd0;
class_strip_first_pixel <= 1'b0;
class_strip_last_pixel <= 1'b0;
class_Ra <= {PIX_WIDTH{1'b0}};
class_Rb <= {PIX_WIDTH{1'b0}};
class_Rc <= {PIX_WIDTH{1'b0}};
class_Rd <= {PIX_WIDTH{1'b0}};
class_kind <= EVENT_REGULAR;
class_run_length <= 13'd0;
class_run_end_of_line <= 1'b0;
class_run_interruption_valid <= 1'b0;
event_kind <= EVENT_REGULAR;
event_valid <= 1'b0;
event_sample <= {PIX_WIDTH{1'b0}};
event_x <= 13'd0;
event_y <= 13'd0;
event_strip_first_pixel <= 1'b0;
event_strip_last_pixel <= 1'b0;
event_Ra <= {PIX_WIDTH{1'b0}};
event_Rb <= {PIX_WIDTH{1'b0}};
event_Rc <= {PIX_WIDTH{1'b0}};
event_Rd <= {PIX_WIDTH{1'b0}};
event_run_length <= 13'd0;
event_run_end_of_line <= 1'b0;
event_run_interruption_valid <= 1'b0;
event_next_valid <= 1'b0;
event_next_sample <= {PIX_WIDTH{1'b0}};
event_next_x <= 13'd0;
event_next_y <= 13'd0;
event_next_strip_first_pixel <= 1'b0;
event_next_strip_last_pixel <= 1'b0;
event_next_Ra <= {PIX_WIDTH{1'b0}};
event_next_Rb <= {PIX_WIDTH{1'b0}};
event_next_Rc <= {PIX_WIDTH{1'b0}};
event_next_Rd <= {PIX_WIDTH{1'b0}};
event_next_kind <= EVENT_REGULAR;
event_next_run_length <= 13'd0;
event_next_run_end_of_line <= 1'b0;
event_next_run_interruption_valid <= 1'b0;
end else begin
if (state == STATE_WAIT_SEG && run_segment_done) begin
state <= STATE_IDLE;
end
if (event_accept) begin
event_valid <= 1'b0;
end
if (class_to_event) begin
class_valid <= 1'b0;
end
if (slot_to_class) begin
class_valid <= 1'b1;
class_sample <= slot_sample;
class_x <= slot_x;
class_y <= slot_y;
class_strip_first_pixel <= slot_strip_first_pixel;
class_strip_last_pixel <= slot_strip_last_pixel;
class_Ra <= slot_Ra;
class_Rb <= slot_Rb;
class_Rc <= slot_Rc;
class_Rd <= slot_Rd;
class_kind <= event_kind_next;
class_run_length <= event_run_length_next;
class_run_end_of_line <= event_run_end_of_line_next;
class_run_interruption_valid <= event_run_interruption_valid_next;
end
if (promote_next_event) begin
event_valid <= 1'b1;
event_kind <= event_next_kind;
event_sample <= event_next_sample;
event_x <= event_next_x;
event_y <= event_next_y;
event_strip_first_pixel <= event_next_strip_first_pixel;
event_strip_last_pixel <= event_next_strip_last_pixel;
event_Ra <= event_next_Ra;
event_Rb <= event_next_Rb;
event_Rc <= event_next_Rc;
event_Rd <= event_next_Rd;
event_run_length <= event_next_run_length;
event_run_end_of_line <= event_next_run_end_of_line;
event_run_interruption_valid <= event_next_run_interruption_valid;
event_next_valid <= 1'b0;
end
if (load_event_front_from_class) begin
event_valid <= 1'b1;
event_kind <= class_kind;
event_sample <= class_sample;
event_x <= class_x;
event_y <= class_y;
event_strip_first_pixel <= class_strip_first_pixel;
event_strip_last_pixel <= class_strip_last_pixel;
event_Ra <= class_Ra;
event_Rb <= class_Rb;
event_Rc <= class_Rc;
event_Rd <= class_Rd;
event_run_length <= class_run_length;
event_run_end_of_line <= class_run_end_of_line;
event_run_interruption_valid <= class_run_interruption_valid;
end
if (load_event_next_from_class) begin
event_next_valid <= 1'b1;
event_next_kind <= class_kind;
event_next_sample <= class_sample;
event_next_x <= class_x;
event_next_y <= class_y;
event_next_strip_first_pixel <= class_strip_first_pixel;
event_next_strip_last_pixel <= class_strip_last_pixel;
event_next_Ra <= class_Ra;
event_next_Rb <= class_Rb;
event_next_Rc <= class_Rc;
event_next_Rd <= class_Rd;
event_next_run_length <= class_run_length;
event_next_run_end_of_line <= class_run_end_of_line;
event_next_run_interruption_valid <= class_run_interruption_valid;
end
if (class_to_event) begin
case (class_kind)
EVENT_RUN_PIXEL: begin
run_length_accum <= class_run_length;
end
EVENT_RUN_EOF_LINE, EVENT_INTERRUPT: begin
run_length_accum <= 13'd0;
state <= STATE_WAIT_SEG;
end
default: begin
run_length_accum <= run_length_accum;
end
endcase
end
case ({slot_to_class, slot_valid, next_slot_valid, load_pixel})
4'b1100: begin
slot_valid <= 1'b0;
end
4'b1101: begin
slot_valid <= 1'b1;
slot_sample <= pixel_sample;
slot_x <= pixel_x;
slot_y <= pixel_y;
slot_strip_first_pixel <= pixel_strip_first_pixel;
slot_strip_last_pixel <= pixel_strip_last_pixel;
slot_Ra <= Ra;
slot_Rb <= Rb;
slot_Rc <= Rc;
slot_Rd <= Rd;
slot_D1 <= pixel_D1;
slot_D2 <= pixel_D2;
slot_D3 <= pixel_D3;
slot_sample_minus_Ra <= pixel_sample_minus_Ra;
slot_pixel_is_eol <= pixel_is_eol_next;
end
4'b1110: begin
slot_valid <= 1'b1;
slot_sample <= next_slot_sample;
slot_x <= next_slot_x;
slot_y <= next_slot_y;
slot_strip_first_pixel <= next_slot_strip_first_pixel;
slot_strip_last_pixel <= next_slot_strip_last_pixel;
slot_Ra <= next_slot_Ra;
slot_Rb <= next_slot_Rb;
slot_Rc <= next_slot_Rc;
slot_Rd <= next_slot_Rd;
slot_D1 <= next_slot_D1;
slot_D2 <= next_slot_D2;
slot_D3 <= next_slot_D3;
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
slot_pixel_is_eol <= next_slot_pixel_is_eol;
next_slot_valid <= 1'b0;
end
4'b1111: begin
slot_valid <= 1'b1;
slot_sample <= next_slot_sample;
slot_x <= next_slot_x;
slot_y <= next_slot_y;
slot_strip_first_pixel <= next_slot_strip_first_pixel;
slot_strip_last_pixel <= next_slot_strip_last_pixel;
slot_Ra <= next_slot_Ra;
slot_Rb <= next_slot_Rb;
slot_Rc <= next_slot_Rc;
slot_Rd <= next_slot_Rd;
slot_D1 <= next_slot_D1;
slot_D2 <= next_slot_D2;
slot_D3 <= next_slot_D3;
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
slot_pixel_is_eol <= next_slot_pixel_is_eol;
next_slot_valid <= 1'b1;
next_slot_sample <= pixel_sample;
next_slot_x <= pixel_x;
next_slot_y <= pixel_y;
next_slot_strip_first_pixel <= pixel_strip_first_pixel;
next_slot_strip_last_pixel <= pixel_strip_last_pixel;
next_slot_Ra <= Ra;
next_slot_Rb <= Rb;
next_slot_Rc <= Rc;
next_slot_Rd <= Rd;
next_slot_D1 <= pixel_D1;
next_slot_D2 <= pixel_D2;
next_slot_D3 <= pixel_D3;
next_slot_sample_minus_Ra <= pixel_sample_minus_Ra;
next_slot_pixel_is_eol <= pixel_is_eol_next;
end
4'b0001: begin
slot_valid <= 1'b1;
slot_sample <= pixel_sample;
slot_x <= pixel_x;
slot_y <= pixel_y;
slot_strip_first_pixel <= pixel_strip_first_pixel;
slot_strip_last_pixel <= pixel_strip_last_pixel;
slot_Ra <= Ra;
slot_Rb <= Rb;
slot_Rc <= Rc;
slot_Rd <= Rd;
slot_D1 <= pixel_D1;
slot_D2 <= pixel_D2;
slot_D3 <= pixel_D3;
slot_sample_minus_Ra <= pixel_sample_minus_Ra;
slot_pixel_is_eol <= pixel_is_eol_next;
end
4'b0101, 4'b0111: begin
next_slot_valid <= 1'b1;
next_slot_sample <= pixel_sample;
next_slot_x <= pixel_x;
next_slot_y <= pixel_y;
next_slot_strip_first_pixel <= pixel_strip_first_pixel;
next_slot_strip_last_pixel <= pixel_strip_last_pixel;
next_slot_Ra <= Ra;
next_slot_Rb <= Rb;
next_slot_Rc <= Rc;
next_slot_Rd <= Rd;
next_slot_D1 <= pixel_D1;
next_slot_D2 <= pixel_D2;
next_slot_D3 <= pixel_D3;
next_slot_sample_minus_Ra <= pixel_sample_minus_Ra;
next_slot_pixel_is_eol <= pixel_is_eol_next;
end
default: begin
slot_valid <= slot_valid;
next_slot_valid <= next_slot_valid;
end
endcase
if (!slot_valid && next_slot_valid && !slot_to_class && !load_pixel) begin
slot_valid <= 1'b1;
slot_sample <= next_slot_sample;
slot_x <= next_slot_x;
slot_y <= next_slot_y;
slot_strip_first_pixel <= next_slot_strip_first_pixel;
slot_strip_last_pixel <= next_slot_strip_last_pixel;
slot_Ra <= next_slot_Ra;
slot_Rb <= next_slot_Rb;
slot_Rc <= next_slot_Rc;
slot_Rd <= next_slot_Rd;
slot_D1 <= next_slot_D1;
slot_D2 <= next_slot_D2;
slot_D3 <= next_slot_D3;
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
slot_pixel_is_eol <= next_slot_pixel_is_eol;
next_slot_valid <= 1'b0;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,264 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex C.2.3 scan header NEAR parameter; Annex A uses NEAR in coding
// Figure : N/A
// Table : N/A
// Pseudocode : Project dynamic NEAR control around the standard NEAR parameter
// Trace : docs/jls_traceability.md#dynamic-near-control
// Example : For ratio=2, target bits are source bits divided by 4.
//
// Dynamic NEAR controller. This project-specific controller keeps NEAR at 0
// for lossless/invalid ratios and applies a simple cumulative actual-vs-target
// step after each standalone strip frame is fully output.
`default_nettype none
module jls_near_ctrl #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16,
// Maximum dynamic NEAR allowed by the first RTL version.
parameter int MAX_NEAR = 31
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// First strip of a new original image; resets dynamic NEAR to 0.
input var logic image_start_valid,
// Runtime ratio sampled for the new original image.
input var logic [3:0] image_ratio,
// Current strip frame is completely output, including header, payload, and EOI.
input var logic strip_done_valid,
// Number of original-image pixels in the completed strip frame.
input var logic [31:0] strip_pixel_count,
// Number of output bytes generated by the completed strip frame.
input var logic [31:0] strip_output_bytes,
// NEAR value to use for the next strip frame header and coding pipeline.
output logic [5:0] current_near,
// Cumulative actual output bits for verification and reporting.
output logic [47:0] actual_bits_cumulative,
// Cumulative target bits for verification and reporting.
output logic [47:0] target_bits_cumulative,
// Sticky report flag: target still missed while NEAR was already at MAX_NEAR.
output logic target_miss_at_max_near,
// One-cycle delayed update is active. The top level holds the next strip
// start while this is high so the scan header observes the updated NEAR.
output logic update_busy
);
// Ratio encodings follow the SRS ratio port definition.
localparam logic [3:0] RATIO_LOSSLESS = 4'd0;
localparam logic [3:0] RATIO_1_TO_2 = 4'd1;
localparam logic [3:0] RATIO_1_TO_4 = 4'd2;
localparam logic [3:0] RATIO_1_TO_8 = 4'd3;
// Saturated project maximum NEAR value.
localparam logic [5:0] MAX_NEAR_VALUE = MAX_NEAR[5:0];
// Latched ratio for the current original image.
logic [3:0] active_ratio;
// Strip-level source and target bit calculations.
logic [47:0] strip_pixel_count_ext;
logic [47:0] strip_source_bits;
logic [47:0] strip_target_bits;
logic [47:0] strip_actual_bits;
// Cumulative sums after adding the current completed strip.
logic [47:0] actual_bits_sum;
logic [47:0] target_bits_sum;
// Registered strip-completion update. This splits the 48-bit adders from
// the actual-vs-target compare and NEAR step logic for 250 MHz timing.
logic pending_update_valid;
logic [47:0] pending_actual_bits_sum;
logic [47:0] pending_target_bits_sum;
logic pending_ratio_is_lossless_or_invalid;
// Ratio classification and NEAR update decisions.
logic ratio_is_lossless_or_invalid;
logic actual_over_target;
logic actual_under_target;
logic near_can_increase;
logic near_can_decrease;
logic near_is_max;
always_comb begin
update_busy = pending_update_valid;
end
always_comb begin
strip_pixel_count_ext = {16'd0, strip_pixel_count};
end
always_comb begin
strip_source_bits = {48{1'b0}};
case (PIX_WIDTH)
8: begin
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000};
end
10: begin
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
{strip_pixel_count_ext[46:0], 1'b0};
end
12: begin
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
{strip_pixel_count_ext[45:0], 2'b00};
end
14: begin
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
{strip_pixel_count_ext[45:0], 2'b00} +
{strip_pixel_count_ext[46:0], 1'b0};
end
16: begin
strip_source_bits = {strip_pixel_count_ext[43:0], 4'b0000};
end
default: begin
strip_source_bits = {strip_pixel_count_ext[43:0], 4'b0000};
end
endcase
end
always_comb begin
strip_target_bits = strip_source_bits;
case (active_ratio)
RATIO_1_TO_2: begin
strip_target_bits = {1'b0, strip_source_bits[47:1]};
end
RATIO_1_TO_4: begin
strip_target_bits = {2'b00, strip_source_bits[47:2]};
end
RATIO_1_TO_8: begin
strip_target_bits = {3'b000, strip_source_bits[47:3]};
end
default: begin
strip_target_bits = strip_source_bits;
end
endcase
end
always_comb begin
strip_actual_bits = {13'd0, strip_output_bytes, 3'b000};
end
always_comb begin
actual_bits_sum = actual_bits_cumulative + strip_actual_bits;
target_bits_sum = target_bits_cumulative + strip_target_bits;
end
always_comb begin
ratio_is_lossless_or_invalid = 1'b0;
case (active_ratio)
RATIO_1_TO_2: ratio_is_lossless_or_invalid = 1'b0;
RATIO_1_TO_4: ratio_is_lossless_or_invalid = 1'b0;
RATIO_1_TO_8: ratio_is_lossless_or_invalid = 1'b0;
default: ratio_is_lossless_or_invalid = 1'b1;
endcase
end
always_comb begin
actual_over_target = 1'b0;
if (pending_actual_bits_sum > pending_target_bits_sum) begin
actual_over_target = 1'b1;
end
end
always_comb begin
actual_under_target = 1'b0;
if (pending_actual_bits_sum < pending_target_bits_sum) begin
actual_under_target = 1'b1;
end
end
always_comb begin
near_is_max = 1'b0;
if (current_near >= MAX_NEAR_VALUE) begin
near_is_max = 1'b1;
end
end
always_comb begin
near_can_increase = 1'b0;
if (!pending_ratio_is_lossless_or_invalid && actual_over_target && !near_is_max) begin
near_can_increase = 1'b1;
end
end
always_comb begin
near_can_decrease = 1'b0;
if (!pending_ratio_is_lossless_or_invalid && actual_under_target &&
current_near != 6'd0) begin
near_can_decrease = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
active_ratio <= RATIO_LOSSLESS;
current_near <= 6'd0;
actual_bits_cumulative <= 48'd0;
target_bits_cumulative <= 48'd0;
target_miss_at_max_near <= 1'b0;
pending_update_valid <= 1'b0;
pending_actual_bits_sum <= 48'd0;
pending_target_bits_sum <= 48'd0;
pending_ratio_is_lossless_or_invalid <= 1'b1;
end else begin
if (image_start_valid) begin
active_ratio <= image_ratio;
current_near <= 6'd0;
actual_bits_cumulative <= 48'd0;
target_bits_cumulative <= 48'd0;
target_miss_at_max_near <= 1'b0;
pending_update_valid <= 1'b0;
pending_actual_bits_sum <= 48'd0;
pending_target_bits_sum <= 48'd0;
pending_ratio_is_lossless_or_invalid <= 1'b1;
end else if (pending_update_valid) begin
actual_bits_cumulative <= pending_actual_bits_sum;
target_bits_cumulative <= pending_target_bits_sum;
if (pending_ratio_is_lossless_or_invalid) begin
current_near <= 6'd0;
end else if (near_can_increase) begin
current_near <= current_near + 6'd1;
end else if (near_can_decrease) begin
current_near <= current_near - 6'd1;
end
if (!pending_ratio_is_lossless_or_invalid && actual_over_target && near_is_max) begin
target_miss_at_max_near <= 1'b1;
end
pending_update_valid <= 1'b0;
end else if (strip_done_valid) begin
pending_update_valid <= 1'b1;
pending_actual_bits_sum <= actual_bits_sum;
pending_target_bits_sum <= target_bits_sum;
pending_ratio_is_lossless_or_invalid <= ratio_is_lossless_or_invalid;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,79 @@
// Standard : Helper for JPEG-LS Annex A.5/A.6/A.7 odd-scale products
// Clause : N/A helper used by multiple Annex arithmetic stages
// Figure : N/A
// Table : N/A
// Pseudocode : product = multiplicand * (2 * NEAR + 1)
// Trace : docs/jls_traceability.md#regular-error-quantization
// Example : multiplicand=5 and near_scale=5 gives 25.
//
// The JPEG-LS NEAR scale is always an odd 6-bit positive value in the range
// 1..63. Vivado tended to map these narrow-scale multiplies into cascaded
// DSP48E1 structures, which put PCOUT->PCIN on the top timing path. This
// helper keeps the operation in carry chains with a fixed three-adder shape:
// one partial sum for bits [2:0], one partial sum for bits [5:3], then a final
// add. The caller provides the surrounding pipeline registers.
`default_nettype none
module jls_near_scale_mul #(
parameter int INPUT_WIDTH = 33,
parameter int OUTPUT_WIDTH = 41
) (
input var logic signed [INPUT_WIDTH-1:0] multiplicand_i,
input var logic [5:0] near_scale_i,
output logic signed [OUTPUT_WIDTH-1:0] product_o
);
localparam int EXTEND_WIDTH = OUTPUT_WIDTH - INPUT_WIDTH;
logic signed [OUTPUT_WIDTH-1:0] multiplicand_ext;
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_1;
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_2;
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_3;
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_4;
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_5;
logic signed [OUTPUT_WIDTH-1:0] partial_lo;
logic signed [OUTPUT_WIDTH-1:0] partial_hi;
always_comb begin
multiplicand_ext = {{EXTEND_WIDTH{multiplicand_i[INPUT_WIDTH-1]}}, multiplicand_i};
multiplicand_shift_1 = multiplicand_ext <<< 1;
multiplicand_shift_2 = multiplicand_ext <<< 2;
multiplicand_shift_3 = multiplicand_ext <<< 3;
multiplicand_shift_4 = multiplicand_ext <<< 4;
multiplicand_shift_5 = multiplicand_ext <<< 5;
end
always_comb begin
case (near_scale_i[2:0])
3'b000: partial_lo = {OUTPUT_WIDTH{1'b0}};
3'b001: partial_lo = multiplicand_ext;
3'b010: partial_lo = multiplicand_shift_1;
3'b011: partial_lo = multiplicand_ext + multiplicand_shift_1;
3'b100: partial_lo = multiplicand_shift_2;
3'b101: partial_lo = multiplicand_ext + multiplicand_shift_2;
3'b110: partial_lo = multiplicand_shift_1 + multiplicand_shift_2;
default: partial_lo = multiplicand_ext + multiplicand_shift_1 + multiplicand_shift_2;
endcase
end
always_comb begin
case (near_scale_i[5:3])
3'b000: partial_hi = {OUTPUT_WIDTH{1'b0}};
3'b001: partial_hi = multiplicand_shift_3;
3'b010: partial_hi = multiplicand_shift_4;
3'b011: partial_hi = multiplicand_shift_3 + multiplicand_shift_4;
3'b100: partial_hi = multiplicand_shift_5;
3'b101: partial_hi = multiplicand_shift_3 + multiplicand_shift_5;
3'b110: partial_hi = multiplicand_shift_4 + multiplicand_shift_5;
default: partial_hi = multiplicand_shift_3 + multiplicand_shift_4 + multiplicand_shift_5;
endcase
end
always_comb begin
product_o = partial_lo + partial_hi;
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,485 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.3 context determination, Annex A.4 prediction
// Figure : N/A
// Table : N/A
// Pseudocode : Reconstructed neighborhood selection for Ra/Rb/Rc/Rd
// Trace : docs/jls_traceability.md#med-predictor
// Example : At x=0, Ra and Rb are the first sample from the previous line;
// Rc is the previous line's left-edge extension sample, and Rd
// is the next previous-line sample.
//
// Reconstructed-neighbor provider for one grayscale strip frame. JPEG-LS uses
// encoder-side reconstructed samples as prediction history. For NEAR=0 the
// reconstructed value is exactly the input sample, so this module commits the
// sample to line history immediately and removes the feedback bubble. For
// NEAR>0 it keeps one pixel outstanding until the true reconstructed sample
// returns, preserving near-lossless standard state. The next pixel is accepted
// after Rx is committed; this deliberate timing boundary keeps recon_x/recon_y
// out of the upstream ready path at the 250 MHz target.
`default_nettype none
module jls_neighbor_provider #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16,
// Maximum supported runtime image width.
parameter int MAX_PIC_COL = 6144
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Pixel event from jls_scan_ctrl is valid.
input var logic pixel_valid,
// This provider can accept the current source pixel.
output logic pixel_ready,
// Original input sample X.
input var logic [PIX_WIDTH-1:0] pixel_sample,
// Original-image coordinate. A strip starts when strip_first_pixel is high.
input var logic [12:0] pixel_x,
input var logic [12:0] pixel_y,
// High on the last column of the current row. jls_scan_ctrl registers this
// flag with pixel_x/y so the width comparison is not on the line-RAM read
// path for Rd.
input var logic pixel_row_last,
// Strip-local boundary flags.
input var logic strip_first_pixel,
input var logic strip_last_pixel,
// Active-strip fast-commit mode. High only when NEAR==0 for this strip, so
// the provider can write X directly into line history without waiting for
// the later reconstructed-sample return path.
input var logic lossless_fast_mode,
// Pixel/neighborhood event is valid.
output logic neigh_valid,
// Downstream predictor accepted the neighborhood event.
input var logic neigh_ready,
// Forwarded pixel fields.
output logic [PIX_WIDTH-1:0] neigh_sample,
output logic [12:0] neigh_x,
output logic [12:0] neigh_y,
output logic neigh_strip_first_pixel,
output logic neigh_strip_last_pixel,
// Standard reconstructed neighbors.
output logic [PIX_WIDTH-1:0] Ra,
output logic [PIX_WIDTH-1:0] Rb,
output logic [PIX_WIDTH-1:0] Rc,
output logic [PIX_WIDTH-1:0] Rd,
// Reconstructed sample writeback from the later error-quantizer/run stage.
input var logic recon_valid,
// This provider is waiting for the current reconstructed sample.
output logic recon_ready,
// Reconstructed sample Rx and its coordinate.
input var logic [PIX_WIDTH-1:0] recon_sample,
input var logic [12:0] recon_x,
input var logic [12:0] recon_y
);
// Two line banks implement previous/current reconstructed rows. The active
// read bank is the previous row, while the other bank receives this row.
logic [PIX_WIDTH-1:0] line_bank0 [0:MAX_PIC_COL-1];
logic [PIX_WIDTH-1:0] line_bank1 [0:MAX_PIC_COL-1];
// Read/write bank selector. The write bank is the opposite of read_bank.
logic read_bank;
logic write_bank;
// High while the current row is the first row of a standalone strip frame.
logic top_row_active;
// One outstanding pixel is held until its reconstructed sample returns when
// NEAR>0. The NEAR=0 path does not use this bubble because Rx == X.
logic waiting_reconstruct;
logic [12:0] outstanding_x;
logic [12:0] outstanding_y;
logic outstanding_row_last;
// Left reconstructed neighbor for non-left-edge pixels in the current row.
logic [PIX_WIDTH-1:0] left_Ra;
logic [PIX_WIDTH-1:0] left_Ra_for_pixel;
// JPEG-LS left-edge extension state. CharLS models this with a width+2 line
// buffer where previous_line[0] contains the first reconstructed sample from
// the line before the previous line. For x=0 this value is Rc; it is zero on
// the strip top row and on the row immediately after the strip top row.
logic [PIX_WIDTH-1:0] left_edge_Rc;
logic [PIX_WIDTH-1:0] row_left_Rb;
// Address and boundary decode for the source pixel.
logic pixel_x_is_left_edge;
logic pixel_x_is_right_edge;
logic [12:0] rb_addr;
logic [12:0] rc_addr;
logic [12:0] rd_addr;
logic effective_top_row_active;
// Previous-line samples read from the selected bank.
logic [PIX_WIDTH-1:0] prev_Rb;
logic [PIX_WIDTH-1:0] prev_Rc;
logic [PIX_WIDTH-1:0] prev_Rd;
// Neighborhood values for the accepted pixel.
logic [PIX_WIDTH-1:0] Ra_next;
logic [PIX_WIDTH-1:0] Rb_next;
logic [PIX_WIDTH-1:0] Rc_next;
logic [PIX_WIDTH-1:0] Rd_next;
// Lossless commit path. On the first pixel of a strip, the read bank is
// reset to bank0 and the current row writes to bank1; accept_write_bank makes
// that same-cycle choice explicit rather than using the old read_bank value.
logic accept_pixel_needs_recon;
logic accept_pixel_fast_commit;
logic accept_write_bank;
logic [PIX_WIDTH-1:0] row_left_Rb_next;
logic line_write_valid;
logic line_write_bank;
logic [12:0] line_write_addr;
logic [PIX_WIDTH-1:0] line_write_sample;
// Handshake terms.
logic neigh_slot_open;
logic accept_pixel;
logic accept_recon;
logic accept_recon_write;
logic recon_is_row_last;
logic [12:0] recon_x_plus_one;
logic recon_bypass_not_row_last;
logic recon_bypass_strip_ok;
logic recon_bypass_x_matches;
logic recon_bypass_y_matches;
logic same_row_recon_bypass_ready;
always_comb begin
write_bank = ~read_bank;
end
always_comb begin
effective_top_row_active = top_row_active;
if (strip_first_pixel) begin
effective_top_row_active = 1'b1;
end
end
always_comb begin
pixel_x_is_left_edge = 1'b0;
if (pixel_x == 13'd0) begin
pixel_x_is_left_edge = 1'b1;
end
end
always_comb begin
pixel_x_is_right_edge = pixel_row_last;
end
always_comb begin
rb_addr = pixel_x;
rc_addr = pixel_x;
rd_addr = pixel_x;
if (!pixel_x_is_left_edge) begin
rc_addr = pixel_x - 13'd1;
end
if (!pixel_x_is_right_edge) begin
rd_addr = pixel_x + 13'd1;
end
end
always_comb begin
prev_Rb = {PIX_WIDTH{1'b0}};
prev_Rc = {PIX_WIDTH{1'b0}};
prev_Rd = {PIX_WIDTH{1'b0}};
if (!effective_top_row_active) begin
case (read_bank)
1'b0: begin
prev_Rb = line_bank0[rb_addr];
prev_Rc = line_bank0[rc_addr];
prev_Rd = line_bank0[rd_addr];
end
default: begin
prev_Rb = line_bank1[rb_addr];
prev_Rc = line_bank1[rc_addr];
prev_Rd = line_bank1[rd_addr];
end
endcase
end
end
always_comb begin
left_Ra_for_pixel = left_Ra;
end
always_comb begin
Ra_next = left_Ra_for_pixel;
if (pixel_x_is_left_edge) begin
Ra_next = prev_Rb;
end
end
always_comb begin
Rb_next = prev_Rb;
Rc_next = prev_Rc;
Rd_next = prev_Rd;
if (pixel_x_is_left_edge) begin
Rc_next = left_edge_Rc;
if (effective_top_row_active) begin
Rc_next = {PIX_WIDTH{1'b0}};
end
end
end
always_comb begin
accept_pixel_needs_recon = 1'b1;
if (lossless_fast_mode) begin
accept_pixel_needs_recon = 1'b0;
end
end
always_comb begin
accept_pixel_fast_commit = 1'b0;
if (accept_pixel && lossless_fast_mode) begin
accept_pixel_fast_commit = 1'b1;
end
end
always_comb begin
accept_write_bank = write_bank;
if (strip_first_pixel) begin
accept_write_bank = 1'b1;
end
end
always_comb begin
row_left_Rb_next = row_left_Rb;
if (strip_first_pixel) begin
row_left_Rb_next = {PIX_WIDTH{1'b0}};
end else if (pixel_x_is_left_edge) begin
row_left_Rb_next = prev_Rb;
end
end
always_comb begin
// One synthesized write port for the current-row line history. Lossless
// fast mode writes X immediately; NEAR>0 writes the returned Rx. The two
// cases are mutually exclusive, but muxing them here keeps Vivado from
// seeing two unrelated write patterns for the same line-bank memories.
line_write_valid = 1'b0;
line_write_bank = accept_write_bank;
line_write_addr = pixel_x;
line_write_sample = pixel_sample;
if (accept_pixel_fast_commit) begin
line_write_valid = 1'b1;
end
if (accept_recon_write) begin
line_write_valid = 1'b1;
line_write_bank = write_bank;
line_write_addr = recon_x;
line_write_sample = recon_sample;
end
end
always_comb begin
neigh_slot_open = 1'b0;
if (!neigh_valid || neigh_ready) begin
neigh_slot_open = 1'b1;
end
end
always_comb begin
pixel_ready = 1'b0;
if (neigh_slot_open && !waiting_reconstruct) begin
pixel_ready = 1'b1;
end
end
always_comb begin
accept_pixel = 1'b0;
if (pixel_valid && pixel_ready) begin
accept_pixel = 1'b1;
end
end
always_comb begin
recon_ready = waiting_reconstruct;
if (lossless_fast_mode && !waiting_reconstruct) begin
recon_ready = 1'b1;
end
end
always_comb begin
accept_recon = 1'b0;
if (recon_valid && recon_ready) begin
accept_recon = 1'b1;
end
end
always_comb begin
accept_recon_write = 1'b0;
if (accept_recon && waiting_reconstruct) begin
accept_recon_write = 1'b1;
end
end
always_comb begin
recon_is_row_last = outstanding_row_last;
end
always_comb begin
recon_x_plus_one = recon_x + 13'd1;
end
always_comb begin
recon_bypass_not_row_last = 1'b0;
if (!recon_is_row_last) begin
recon_bypass_not_row_last = 1'b1;
end
end
always_comb begin
recon_bypass_strip_ok = 1'b0;
if (!strip_first_pixel) begin
recon_bypass_strip_ok = 1'b1;
end
end
always_comb begin
recon_bypass_x_matches = 1'b0;
if (pixel_x == recon_x_plus_one) begin
recon_bypass_x_matches = 1'b1;
end
end
always_comb begin
recon_bypass_y_matches = 1'b0;
if (pixel_y == recon_y) begin
recon_bypass_y_matches = 1'b1;
end
end
always_comb begin
// Diagnostic decode for the previous same-row bypass condition. The
// timing path now waits one clock after Rx writeback instead of using this
// condition in pixel_ready.
same_row_recon_bypass_ready = 1'b0;
if (accept_recon_write && recon_bypass_not_row_last && recon_bypass_strip_ok &&
recon_bypass_x_matches && recon_bypass_y_matches) begin
same_row_recon_bypass_ready = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
read_bank <= 1'b0;
top_row_active <= 1'b1;
waiting_reconstruct <= 1'b0;
outstanding_x <= 13'd0;
outstanding_y <= 13'd0;
outstanding_row_last <= 1'b0;
left_Ra <= {PIX_WIDTH{1'b0}};
left_edge_Rc <= {PIX_WIDTH{1'b0}};
row_left_Rb <= {PIX_WIDTH{1'b0}};
neigh_valid <= 1'b0;
neigh_sample <= {PIX_WIDTH{1'b0}};
neigh_x <= 13'd0;
neigh_y <= 13'd0;
neigh_strip_first_pixel <= 1'b0;
neigh_strip_last_pixel <= 1'b0;
Ra <= {PIX_WIDTH{1'b0}};
Rb <= {PIX_WIDTH{1'b0}};
Rc <= {PIX_WIDTH{1'b0}};
Rd <= {PIX_WIDTH{1'b0}};
end else begin
if (neigh_valid && neigh_ready && !accept_pixel) begin
neigh_valid <= 1'b0;
end
if (accept_pixel) begin
neigh_valid <= 1'b1;
neigh_sample <= pixel_sample;
neigh_x <= pixel_x;
neigh_y <= pixel_y;
neigh_strip_first_pixel <= strip_first_pixel;
neigh_strip_last_pixel <= strip_last_pixel;
Ra <= Ra_next;
Rb <= Rb_next;
Rc <= Rc_next;
Rd <= Rd_next;
waiting_reconstruct <= accept_pixel_needs_recon;
outstanding_x <= pixel_x;
outstanding_y <= pixel_y;
outstanding_row_last <= pixel_x_is_right_edge;
if (strip_first_pixel) begin
top_row_active <= 1'b1;
read_bank <= 1'b0;
left_Ra <= {PIX_WIDTH{1'b0}};
left_edge_Rc <= {PIX_WIDTH{1'b0}};
row_left_Rb <= {PIX_WIDTH{1'b0}};
end else if (pixel_x_is_left_edge) begin
row_left_Rb <= prev_Rb;
end
if (accept_pixel_fast_commit) begin
left_Ra <= pixel_sample;
row_left_Rb <= row_left_Rb_next;
if (pixel_x_is_right_edge) begin
read_bank <= accept_write_bank;
left_Ra <= {PIX_WIDTH{1'b0}};
left_edge_Rc <= row_left_Rb_next;
top_row_active <= 1'b0;
end
end
end
if (accept_recon_write) begin
left_Ra <= recon_sample;
if (!accept_pixel) begin
waiting_reconstruct <= 1'b0;
end
if (recon_is_row_last) begin
read_bank <= write_bank;
left_Ra <= {PIX_WIDTH{1'b0}};
left_edge_Rc <= row_left_Rb;
top_row_active <= 1'b0;
end
end
if (line_write_valid) begin
case (line_write_bank)
1'b0: begin
line_bank0[line_write_addr] <= line_write_sample;
end
default: begin
line_bank1[line_write_addr] <= line_write_sample;
end
endcase
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,218 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex C.1-C.4 marker stream byte order
// Figure : N/A
// Table : N/A
// Pseudocode : Encoded byte stream delivery after JPEG-LS bit packing
// Trace : docs/jls_traceability.md#jls-output-buffer
// Example : A byte event {start=1, byte=8'hFF} becomes ofifo_wdata=9'h1FF.
//
// Internal output buffer for the 9-bit output FIFO interface. The external
// ofifo_full/ofifo_alfull inputs are intentionally ignored by RTL behavior per
// the SRS; simulation reports an error if a write happens while ofifo_full=1.
`default_nettype none
module jls_output_buffer #(
// Internal output-buffer capacity in bytes. Default comes from the SRS.
parameter int OUT_BUF_BYTES = 8192,
// Input-pause margin in bytes. pause_req asserts when occupancy reaches
// OUT_BUF_BYTES - OUT_BUF_AFULL_MARGIN.
parameter int OUT_BUF_AFULL_MARGIN = 256
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Encoded byte event from header writer or bit packer.
input var logic byte_valid,
// This buffer can accept the encoded byte event.
output logic byte_ready,
// JPEG-LS byte in marker-stream order.
input var logic [7:0] byte_data,
// Sideband copied to ofifo_wdata[8] for the original-image first byte only.
input var logic original_image_start,
// Accepted byte event pulse for statistics and dynamic NEAR accounting.
output logic byte_accepted,
// Internal pause request for upstream pipeline throttling.
output logic pause_req,
// Current buffer occupancy in bytes, used by verification reports.
output logic [$clog2(OUT_BUF_BYTES + 1)-1:0] buffer_level,
// Output FIFO write clock, same frequency and phase as clk.
output logic ofifo_wclk,
// Output FIFO write enable.
output logic ofifo_wr,
// Output FIFO data. Bit 8 marks original-image start; bits 7:0 carry bytes.
output logic [8:0] ofifo_wdata,
// Reserved output FIFO full flag. RTL ignores this input for flow control.
input var logic ofifo_full,
// Reserved output FIFO almost-full flag. RTL ignores this input.
input var logic ofifo_alfull
);
// Pointer width for the circular byte buffer.
localparam int PTR_WIDTH = $clog2(OUT_BUF_BYTES);
// Occupancy counter width. It must represent OUT_BUF_BYTES exactly.
localparam int COUNT_WIDTH = $clog2(OUT_BUF_BYTES + 1);
// Last legal circular-buffer pointer value.
localparam logic [PTR_WIDTH-1:0] PTR_LAST_VALUE = OUT_BUF_BYTES - 1;
// Buffer capacity and near-full threshold as sized constants.
localparam logic [COUNT_WIDTH-1:0] OUT_BUF_BYTES_VALUE = OUT_BUF_BYTES;
localparam logic [COUNT_WIDTH-1:0] AFULL_MARGIN_VALUE = OUT_BUF_AFULL_MARGIN;
localparam logic [COUNT_WIDTH-1:0] PAUSE_LEVEL_VALUE =
OUT_BUF_BYTES - OUT_BUF_AFULL_MARGIN;
// Circular storage. Bit 8 is original_image_start, bits 7:0 are stream byte.
logic [8:0] buffer_mem [0:OUT_BUF_BYTES-1];
// Circular write and read pointers.
logic [PTR_WIDTH-1:0] write_ptr;
logic [PTR_WIDTH-1:0] read_ptr;
logic [PTR_WIDTH-1:0] write_ptr_next;
logic [PTR_WIDTH-1:0] read_ptr_next;
// Occupancy and status flags.
logic [COUNT_WIDTH-1:0] occupancy_count;
logic buffer_empty;
logic buffer_full;
logic push_byte;
logic pop_byte;
// Packed byte event stored in the internal buffer.
logic [8:0] buffer_write_word;
// Reserved input observation signal keeps intent explicit without changing
// flow control behavior.
logic ofifo_alfull_ignored;
assign ofifo_wclk = clk;
always_comb begin
buffer_level = occupancy_count;
end
always_comb begin
buffer_write_word = {original_image_start, byte_data};
end
always_comb begin
buffer_empty = 1'b0;
if (occupancy_count == {COUNT_WIDTH{1'b0}}) begin
buffer_empty = 1'b1;
end
end
always_comb begin
buffer_full = 1'b0;
if (occupancy_count == OUT_BUF_BYTES_VALUE) begin
buffer_full = 1'b1;
end
end
always_comb begin
byte_ready = 1'b0;
if (!buffer_full) begin
byte_ready = 1'b1;
end
end
always_comb begin
byte_accepted = 1'b0;
if (byte_valid && byte_ready) begin
byte_accepted = 1'b1;
end
end
always_comb begin
pop_byte = 1'b0;
if (!buffer_empty) begin
pop_byte = 1'b1;
end
end
always_comb begin
push_byte = byte_accepted;
end
always_comb begin
pause_req = 1'b0;
if (occupancy_count >= PAUSE_LEVEL_VALUE) begin
pause_req = 1'b1;
end
end
always_comb begin
write_ptr_next = write_ptr + {{(PTR_WIDTH-1){1'b0}}, 1'b1};
if (write_ptr == PTR_LAST_VALUE) begin
write_ptr_next = {PTR_WIDTH{1'b0}};
end
end
always_comb begin
read_ptr_next = read_ptr + {{(PTR_WIDTH-1){1'b0}}, 1'b1};
if (read_ptr == PTR_LAST_VALUE) begin
read_ptr_next = {PTR_WIDTH{1'b0}};
end
end
always_comb begin
ofifo_alfull_ignored = ofifo_alfull;
end
always_ff @(posedge clk) begin
if (rst) begin
write_ptr <= {PTR_WIDTH{1'b0}};
read_ptr <= {PTR_WIDTH{1'b0}};
occupancy_count <= {COUNT_WIDTH{1'b0}};
ofifo_wr <= 1'b0;
ofifo_wdata <= 9'd0;
end else begin
ofifo_wr <= pop_byte;
if (pop_byte) begin
ofifo_wdata <= buffer_mem[read_ptr];
read_ptr <= read_ptr_next;
end else begin
ofifo_wdata <= 9'd0;
end
if (push_byte) begin
buffer_mem[write_ptr] <= buffer_write_word;
write_ptr <= write_ptr_next;
end
case ({push_byte, pop_byte})
2'b10: begin
occupancy_count <= occupancy_count + {{(COUNT_WIDTH-1){1'b0}}, 1'b1};
end
2'b01: begin
occupancy_count <= occupancy_count - {{(COUNT_WIDTH-1){1'b0}}, 1'b1};
end
default: begin
occupancy_count <= occupancy_count;
end
endcase
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,196 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 prediction error encoding, Annex A.6 bias variables
// Figure : N/A
// Table : N/A
// Pseudocode : Px correction by C[Q] followed by bounds correction
// Trace : docs/jls_traceability.md#regular-prediction-correction
// Example : Px=20,C=-3,negative_context=0 gives corrected_Px=17.
//
// Registered prediction correction stage. It applies context sign to C[Q],
// adds the result to the MED prediction Px, and clamps the prediction to
// 0..MAXVAL like the JPEG-LS correct_prediction operation.
`default_nettype none
module jls_prediction_corrector #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Input context event is valid.
input var logic context_valid,
// This stage can accept the current context event.
output logic context_ready,
// Forwarded original input sample X.
input var logic [PIX_WIDTH-1:0] context_sample,
// Forwarded pixel coordinate.
input var logic [12:0] context_x,
// Forwarded pixel coordinate.
input var logic [12:0] context_y,
// Forwarded strip boundary flags.
input var logic context_strip_first_pixel,
input var logic context_strip_last_pixel,
// JPEG-LS MED prediction value Px.
input var logic [PIX_WIDTH-1:0] Px,
// Standard bias correction variable C[Q], range -128..127.
input var logic [31:0] A,
input var logic signed [31:0] B,
input var logic signed [8:0] C,
input var logic [15:0] N,
// High when the quantized context sign is negative.
input var logic context_negative,
// Context index and run-mode flag are forwarded for later stages.
input var logic [8:0] context_index,
input var logic run_mode_context,
// Corrected prediction event is valid.
output logic corrected_valid,
// Downstream regular/run-mode stage accepted this event.
input var logic corrected_ready,
// Forwarded original input sample X.
output logic [PIX_WIDTH-1:0] corrected_sample,
// Forwarded pixel coordinate.
output logic [12:0] corrected_x,
// Forwarded pixel coordinate.
output logic [12:0] corrected_y,
// Forwarded strip boundary flags.
output logic corrected_strip_first_pixel,
output logic corrected_strip_last_pixel,
// Corrected and clamped prediction value.
output logic [PIX_WIDTH-1:0] corrected_Px,
// Forwarded context metadata.
output logic [8:0] corrected_context_index,
output logic corrected_context_negative,
output logic corrected_run_mode_context,
// Forwarded pre-update context variables for jls_context_update.
output logic [31:0] corrected_A,
output logic signed [31:0] corrected_B,
output logic signed [8:0] corrected_C,
output logic [15:0] corrected_N
);
// Signed working width covers 16-bit MAXVAL plus the C[Q] range.
localparam logic signed [18:0] MAXVAL_VALUE = (19'sd1 <<< PIX_WIDTH) - 19'sd1;
// Context-signed C and prediction sum.
logic signed [18:0] C_ext;
logic signed [18:0] signed_C;
logic signed [18:0] Px_ext;
logic signed [18:0] prediction_sum;
logic [PIX_WIDTH-1:0] corrected_Px_next;
// Handshake terms.
logic slot_open;
logic accept_context;
always_comb begin
C_ext = {{10{C[8]}}, C};
end
always_comb begin
signed_C = C_ext;
if (context_negative) begin
signed_C = -C_ext;
end
end
always_comb begin
Px_ext = $signed({3'd0, Px});
end
always_comb begin
prediction_sum = Px_ext + signed_C;
end
always_comb begin
corrected_Px_next = prediction_sum[PIX_WIDTH-1:0];
if (prediction_sum < 19'sd0) begin
corrected_Px_next = {PIX_WIDTH{1'b0}};
end else if (prediction_sum > MAXVAL_VALUE) begin
corrected_Px_next = MAXVAL_VALUE[PIX_WIDTH-1:0];
end
end
always_comb begin
slot_open = 1'b0;
if (!corrected_valid || corrected_ready) begin
slot_open = 1'b1;
end
end
always_comb begin
context_ready = slot_open;
end
always_comb begin
accept_context = 1'b0;
if (context_valid && context_ready) begin
accept_context = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
corrected_valid <= 1'b0;
corrected_sample <= {PIX_WIDTH{1'b0}};
corrected_x <= 13'd0;
corrected_y <= 13'd0;
corrected_strip_first_pixel <= 1'b0;
corrected_strip_last_pixel <= 1'b0;
corrected_Px <= {PIX_WIDTH{1'b0}};
corrected_context_index <= 9'd0;
corrected_context_negative <= 1'b0;
corrected_run_mode_context <= 1'b0;
corrected_A <= 32'd0;
corrected_B <= 32'sd0;
corrected_C <= 9'sd0;
corrected_N <= 16'd0;
end else begin
if (corrected_valid && corrected_ready && !accept_context) begin
corrected_valid <= 1'b0;
end
if (accept_context) begin
corrected_valid <= 1'b1;
corrected_sample <= context_sample;
corrected_x <= context_x;
corrected_y <= context_y;
corrected_strip_first_pixel <= context_strip_first_pixel;
corrected_strip_last_pixel <= context_strip_last_pixel;
corrected_Px <= corrected_Px_next;
corrected_context_index <= context_index;
corrected_context_negative <= context_negative;
corrected_run_mode_context <= run_mode_context;
corrected_A <= A;
corrected_B <= B;
corrected_C <= C;
corrected_N <= N;
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,273 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.4 prediction, Annex G.1 regular-mode variables
// Figure : N/A
// Table : N/A
// Pseudocode : MED predictor / Px calculation from Ra, Rb, and Rc
// Trace : docs/jls_traceability.md#med-predictor
// Example : If Ra=10, Rb=20, Rc=15, Px=Ra+Rb-Rc=15.
//
// Registered MED predictor stage. A separate line-buffer stage supplies the
// reconstructed neighbors Ra/Rb/Rc/Rd. This split keeps the neighbor memory
// path independent from the MED compare/add path for the 250 MHz target.
`default_nettype none
module jls_predictor #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Input pixel/neighborhood event is valid.
input var logic pixel_valid,
// This predictor can accept the current input event.
output logic pixel_ready,
// Original input sample X from the standard encoder notation.
input var logic [PIX_WIDTH-1:0] pixel_sample,
// Zero-based original-image column coordinate.
input var logic [12:0] pixel_x,
// Zero-based original-image row coordinate.
input var logic [12:0] pixel_y,
// First pixel of the current standalone strip frame.
input var logic strip_first_pixel,
// Last pixel of the current standalone strip frame.
input var logic strip_last_pixel,
// Reconstructed neighbor to the left of X.
input var logic [PIX_WIDTH-1:0] Ra,
// Reconstructed neighbor above X.
input var logic [PIX_WIDTH-1:0] Rb,
// Reconstructed neighbor above-left of X.
input var logic [PIX_WIDTH-1:0] Rc,
// Reconstructed neighbor above-right of X. Forwarded for context gradients.
input var logic [PIX_WIDTH-1:0] Rd,
// Predicted event is valid.
output logic predict_valid,
// Downstream context/error stage accepted the current predicted event.
input var logic predict_ready,
// Forwarded original input sample X.
output logic [PIX_WIDTH-1:0] predict_sample,
// Forwarded pixel coordinate.
output logic [12:0] predict_x,
// Forwarded pixel coordinate.
output logic [12:0] predict_y,
// Forwarded first-pixel flag for strip-local state reset.
output logic predict_strip_first_pixel,
// Forwarded last-pixel flag for strip-local flush handling.
output logic predict_strip_last_pixel,
// Forwarded reconstructed neighbors for context quantization.
output logic [PIX_WIDTH-1:0] predict_Ra,
output logic [PIX_WIDTH-1:0] predict_Rb,
output logic [PIX_WIDTH-1:0] predict_Rc,
output logic [PIX_WIDTH-1:0] predict_Rd,
// JPEG-LS MED prediction value Px.
output logic [PIX_WIDTH-1:0] Px
);
// One extra bit keeps Ra+Rb-Rc arithmetic inside a non-overflowing range.
logic [PIX_WIDTH:0] ra_ext;
logic [PIX_WIDTH:0] rb_ext;
logic [PIX_WIDTH:0] rc_ext;
logic [PIX_WIDTH:0] neighbor_min_ext;
logic [PIX_WIDTH:0] neighbor_max_ext;
logic [PIX_WIDTH:0] med_sum_ext;
logic [PIX_WIDTH-1:0] med_selected;
// Split comparison terms. This mirrors the standard if/else decision while
// making the logic depth visible for later pipeline review.
logic ra_ge_rb;
logic rc_ge_neighbor_max;
logic rc_le_neighbor_min;
logic output_queue_full;
logic accept_pixel;
logic output_accept;
// Second output slot. This local two-entry queue breaks downstream
// ready/CE fan-in from the mode-router output while preserving event order.
// The first slot is the public predict_* register set; this slot holds the
// next Annex A.4 MED prediction event if the downstream stage is stalled.
logic predict_next_valid;
logic [PIX_WIDTH-1:0] predict_next_sample;
logic [12:0] predict_next_x;
logic [12:0] predict_next_y;
logic predict_next_strip_first_pixel;
logic predict_next_strip_last_pixel;
logic [PIX_WIDTH-1:0] predict_next_Ra;
logic [PIX_WIDTH-1:0] predict_next_Rb;
logic [PIX_WIDTH-1:0] predict_next_Rc;
logic [PIX_WIDTH-1:0] predict_next_Rd;
logic [PIX_WIDTH-1:0] predict_next_Px;
always_comb begin
ra_ext = {1'b0, Ra};
rb_ext = {1'b0, Rb};
rc_ext = {1'b0, Rc};
end
always_comb begin
ra_ge_rb = 1'b0;
if (Ra >= Rb) begin
ra_ge_rb = 1'b1;
end
end
always_comb begin
neighbor_min_ext = ra_ext;
neighbor_max_ext = rb_ext;
if (ra_ge_rb) begin
neighbor_min_ext = rb_ext;
neighbor_max_ext = ra_ext;
end
end
always_comb begin
rc_ge_neighbor_max = 1'b0;
if (rc_ext >= neighbor_max_ext) begin
rc_ge_neighbor_max = 1'b1;
end
end
always_comb begin
rc_le_neighbor_min = 1'b0;
if (rc_ext <= neighbor_min_ext) begin
rc_le_neighbor_min = 1'b1;
end
end
always_comb begin
med_sum_ext = ra_ext + rb_ext - rc_ext;
end
always_comb begin
med_selected = med_sum_ext[PIX_WIDTH-1:0];
if (rc_ge_neighbor_max) begin
med_selected = neighbor_min_ext[PIX_WIDTH-1:0];
end else if (rc_le_neighbor_min) begin
med_selected = neighbor_max_ext[PIX_WIDTH-1:0];
end
end
always_comb begin
output_queue_full = 1'b0;
if (predict_valid && predict_next_valid) begin
output_queue_full = 1'b1;
end
end
always_comb begin
pixel_ready = !output_queue_full;
end
always_comb begin
accept_pixel = 1'b0;
if (pixel_valid && pixel_ready) begin
accept_pixel = 1'b1;
end
end
always_comb begin
output_accept = 1'b0;
if (predict_valid && predict_ready) begin
output_accept = 1'b1;
end
end
always_ff @(posedge clk) begin
if (rst) begin
predict_valid <= 1'b0;
predict_next_valid <= 1'b0;
predict_sample <= {PIX_WIDTH{1'b0}};
predict_x <= 13'd0;
predict_y <= 13'd0;
predict_strip_first_pixel <= 1'b0;
predict_strip_last_pixel <= 1'b0;
predict_Ra <= {PIX_WIDTH{1'b0}};
predict_Rb <= {PIX_WIDTH{1'b0}};
predict_Rc <= {PIX_WIDTH{1'b0}};
predict_Rd <= {PIX_WIDTH{1'b0}};
Px <= {PIX_WIDTH{1'b0}};
predict_next_sample <= {PIX_WIDTH{1'b0}};
predict_next_x <= 13'd0;
predict_next_y <= 13'd0;
predict_next_strip_first_pixel <= 1'b0;
predict_next_strip_last_pixel <= 1'b0;
predict_next_Ra <= {PIX_WIDTH{1'b0}};
predict_next_Rb <= {PIX_WIDTH{1'b0}};
predict_next_Rc <= {PIX_WIDTH{1'b0}};
predict_next_Rd <= {PIX_WIDTH{1'b0}};
predict_next_Px <= {PIX_WIDTH{1'b0}};
end else begin
if (output_accept) begin
if (predict_next_valid) begin
predict_valid <= 1'b1;
predict_sample <= predict_next_sample;
predict_x <= predict_next_x;
predict_y <= predict_next_y;
predict_strip_first_pixel <= predict_next_strip_first_pixel;
predict_strip_last_pixel <= predict_next_strip_last_pixel;
predict_Ra <= predict_next_Ra;
predict_Rb <= predict_next_Rb;
predict_Rc <= predict_next_Rc;
predict_Rd <= predict_next_Rd;
Px <= predict_next_Px;
predict_next_valid <= 1'b0;
end else begin
predict_valid <= 1'b0;
end
end
if (accept_pixel) begin
if (!predict_valid || output_accept) begin
predict_valid <= 1'b1;
predict_sample <= pixel_sample;
predict_x <= pixel_x;
predict_y <= pixel_y;
predict_strip_first_pixel <= strip_first_pixel;
predict_strip_last_pixel <= strip_last_pixel;
predict_Ra <= Ra;
predict_Rb <= Rb;
predict_Rc <= Rc;
predict_Rd <= Rd;
Px <= med_selected;
end else begin
predict_next_valid <= 1'b1;
predict_next_sample <= pixel_sample;
predict_next_x <= pixel_x;
predict_next_y <= pixel_y;
predict_next_strip_first_pixel <= strip_first_pixel;
predict_next_strip_last_pixel <= strip_last_pixel;
predict_next_Ra <= Ra;
predict_next_Rb <= Rb;
predict_next_Rc <= Rc;
predict_next_Rd <= Rd;
predict_next_Px <= med_selected;
end
end
end
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,138 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex C.2.4.1.1 preset coding parameters
// Figure : C.3 clamping function, referenced by default threshold rules
// Table : Table C.1 valid preset parameters, Table C.2 RESET, Table C.3 defaults
// Pseudocode : Default threshold calculation for MAXVAL >= 128
// Trace : docs/jls_traceability.md#jls-preset-defaults
// Example : PIX_WIDTH=8, NEAR=0 gives MAXVAL=255, T1=3, T2=7, T3=21.
//
// JPEG-LS default preset coding parameter helper. The first RTL version only
// supports 8/10/12/14/16-bit grayscale samples and NEAR is clamped to 0..31.
// For all supported sample precisions MAXVAL >= 128. With NEAR <= 31 the
// default thresholds do not hit MAXVAL, so the standard C.2.4.1.1 equations
// reduce to shallow shift-add expressions:
// T1 = FACTOR * 1 + 2 + 3*NEAR
// T2 = FACTOR * 4 + 3 + 5*NEAR
// T3 = FACTOR * 17 + 4 + 7*NEAR
`default_nettype none
module jls_preset_defaults #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Requested NEAR value. Values above 31 are clamped defensively.
input var logic [5:0] near,
// JPEG-LS LSE MAXVAL preset coding parameter.
output logic [15:0] preset_maxval,
// JPEG-LS LSE T1 preset coding parameter.
output logic [15:0] preset_t1,
// JPEG-LS LSE T2 preset coding parameter.
output logic [15:0] preset_t2,
// JPEG-LS LSE T3 preset coding parameter.
output logic [15:0] preset_t3,
// JPEG-LS LSE RESET preset coding parameter.
output logic [15:0] preset_reset
);
// Default RESET value from T.87 Table C.2.
localparam logic [15:0] DEFAULT_RESET_VALUE = 16'd64;
// Defensive NEAR clamp for the project maximum.
logic [5:0] near_clamped;
// Shift-add terms for 3*NEAR, 5*NEAR, and 7*NEAR.
logic [15:0] near_ext;
logic [15:0] near_times_2;
logic [15:0] near_times_3;
logic [15:0] near_times_4;
logic [15:0] near_times_5;
logic [15:0] near_times_7;
// Base threshold values after applying the standard FACTOR term.
logic [15:0] base_t1;
logic [15:0] base_t2;
logic [15:0] base_t3;
always_comb begin
near_clamped = near;
if (near > 6'd31) begin
near_clamped = 6'd31;
end
end
always_comb begin
near_ext = {10'd0, near_clamped};
near_times_2 = {near_ext[14:0], 1'b0};
near_times_3 = near_times_2 + near_ext;
near_times_4 = {near_ext[13:0], 2'b00};
near_times_5 = near_times_4 + near_ext;
near_times_7 = near_times_4 + near_times_2 + near_ext;
end
always_comb begin
preset_maxval = 16'hFFFF;
base_t1 = 16'd18;
base_t2 = 16'd67;
base_t3 = 16'd276;
case (PIX_WIDTH)
8: begin
preset_maxval = 16'd255;
base_t1 = 16'd3;
base_t2 = 16'd7;
base_t3 = 16'd21;
end
10: begin
preset_maxval = 16'd1023;
base_t1 = 16'd6;
base_t2 = 16'd19;
base_t3 = 16'd72;
end
12: begin
preset_maxval = 16'd4095;
base_t1 = 16'd18;
base_t2 = 16'd67;
base_t3 = 16'd276;
end
14: begin
preset_maxval = 16'd16383;
base_t1 = 16'd18;
base_t2 = 16'd67;
base_t3 = 16'd276;
end
16: begin
preset_maxval = 16'hFFFF;
base_t1 = 16'd18;
base_t2 = 16'd67;
base_t3 = 16'd276;
end
default: begin
preset_maxval = 16'hFFFF;
base_t1 = 16'd18;
base_t2 = 16'd67;
base_t3 = 16'd276;
end
endcase
end
always_comb begin
preset_t1 = base_t1 + near_times_3;
preset_t2 = base_t2 + near_times_5;
preset_t3 = base_t3 + near_times_7;
preset_reset = DEFAULT_RESET_VALUE;
end
endmodule
`default_nettype wire

View File

@@ -0,0 +1,805 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 prediction error encoding, Annex A.2 RANGE
// Figure : N/A
// Table : N/A
// Pseudocode : Errval quantization/modulo and reconstructed sample computation
// Trace : docs/jls_traceability.md#regular-error-quantization
// Example : X=24, Px=20, NEAR=1 gives Errval=1 and Rx=23.
//
// Regular-mode error quantizer and reconstructed-sample calculator. NEAR>0
// uses an exact reciprocal-LUT multiply and correction pipeline: one cycle for
// the reciprocal multiply, one cycle for the quotient correction, then the
// standard Annex A.5 modulo/reconstruction result. This avoids a large
// combinational divider while reducing the earlier one-bit-per-cycle latency.
`default_nettype none
module jls_regular_error_quantizer #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Corrected prediction event is valid.
input var logic corrected_valid,
// This stage can accept the current event.
output logic corrected_ready,
// Original input sample X.
input var logic [PIX_WIDTH-1:0] corrected_sample,
// Pixel coordinate forwarded for line-buffer writeback/reporting.
input var logic [12:0] corrected_x,
input var logic [12:0] corrected_y,
// Strip boundary flags forwarded with the result.
input var logic corrected_strip_first_pixel,
input var logic corrected_strip_last_pixel,
// Corrected prediction value after C[Q] and bounds correction.
input var logic [PIX_WIDTH-1:0] corrected_Px,
// Forwarded context metadata.
input var logic [8:0] corrected_context_index,
input var logic corrected_context_negative,
input var logic corrected_run_mode_context,
// Pre-update context variables forwarded from jls_context_model.
input var logic [31:0] corrected_A,
input var logic signed [31:0] corrected_B,
input var logic signed [8:0] corrected_C,
input var logic [15:0] corrected_N,
// Coding parameters for the current strip frame.
input var logic [16:0] RANGE,
input var logic [4:0] qbpp,
input var logic [6:0] LIMIT,
input var logic [5:0] NEAR,
// Quantized error event is valid.
output logic err_valid,
// Downstream context-update stage accepted the event.
input var logic err_ready,
// Quantized signed prediction error Errval after context sign handling.
output logic signed [31:0] Errval,
// Reconstructed sample Rx used by the line-buffer stage.
output logic [PIX_WIDTH-1:0] reconstructed_sample,
// Forwarded coordinate and strip flags.
output logic [12:0] err_x,
output logic [12:0] err_y,
output logic err_strip_first_pixel,
output logic err_strip_last_pixel,
// Forwarded context and coding metadata.
output logic [8:0] err_context_index,
output logic err_context_negative,
output logic err_run_mode_context,
output logic [4:0] err_qbpp,
output logic [6:0] err_LIMIT,
// Forwarded pre-update context variables for jls_context_update.
output logic [31:0] err_A,
output logic signed [31:0] err_B,
output logic signed [8:0] err_C,
output logic [15:0] err_N
);
// Divider width covers max numerator MAXVAL + NEAR.
localparam int DIV_WIDTH = PIX_WIDTH + 1;
localparam int RECIP_SHIFT = 24;
localparam int RECIP_MAGIC_WIDTH = 23;
localparam int RECIP_PRODUCT_WIDTH = DIV_WIDTH + RECIP_MAGIC_WIDTH;
localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 6;
// State for the exact reciprocal-LUT division pipeline when NEAR > 0.
typedef enum logic [3:0] {
STATE_IDLE = 4'd0,
STATE_DIV_MUL = 4'd1,
STATE_DIV_CHECK = 4'd2,
STATE_DIV_CORRECT = 4'd3,
STATE_ERRVAL = 4'd4,
STATE_RECON_MUL = 4'd5,
STATE_RECON_SUM = 4'd6,
STATE_RECON_CALC = 4'd7,
STATE_RECON_CLAMP = 4'd8,
STATE_FINISH = 4'd9,
STATE_INPUT_PREP = 4'd10,
STATE_ERRVAL_SIGN = 4'd11,
STATE_NUMERATOR_PREP = 4'd12,
STATE_RECON_FACTORS = 4'd13,
STATE_ERRVAL_PREP = 4'd14
} quant_state_e;
// One-hot state decode keeps per-stage enables shallow. This is important
// when explicit timing-boundary registers below are preserved for 250 MHz.
(* fsm_encoding = "one_hot" *) quant_state_e state;
// Latched event fields.
logic [PIX_WIDTH-1:0] sample_latched;
logic [PIX_WIDTH-1:0] Px_latched;
logic [12:0] x_latched;
logic [12:0] y_latched;
logic strip_first_latched;
logic strip_last_latched;
logic [8:0] context_index_latched;
logic context_negative_latched;
logic run_mode_latched;
logic [31:0] A_latched;
logic signed [31:0] B_latched;
logic signed [8:0] C_latched;
logic [15:0] N_latched;
logic [16:0] RANGE_latched;
logic [4:0] qbpp_latched;
logic [6:0] LIMIT_latched;
logic [5:0] NEAR_latched;
logic signed [32:0] oriented_error_latched;
logic quotient_negative_latched;
// Reciprocal-division registers and combinational next values.
logic [DIV_WIDTH-1:0] div_dividend;
logic [DIV_WIDTH-1:0] div_quotient;
logic [5:0] div_denominator;
logic [RECIP_MAGIC_WIDTH-1:0] div_magic;
logic [RECIP_PRODUCT_WIDTH-1:0] div_product;
logic [5:0] divisor_small_next;
logic [RECIP_MAGIC_WIDTH-1:0] reciprocal_magic_next;
logic [RECIP_PRODUCT_WIDTH-1:0] div_dividend_product_ext;
logic [RECIP_PRODUCT_WIDTH-1:0] div_magic_product_ext;
logic [RECIP_PRODUCT_WIDTH-1:0] div_product_next;
logic [DIV_WIDTH-1:0] recip_quotient_est;
logic [RECIP_CHECK_WIDTH-1:0] recip_quotient_est_ext;
logic [RECIP_CHECK_WIDTH-1:0] recip_divisor_ext;
logic [RECIP_CHECK_WIDTH-1:0] recip_check_product;
logic [RECIP_CHECK_WIDTH-1:0] recip_dividend_ext;
logic [DIV_WIDTH-1:0] recip_quotient_est_latched;
logic [RECIP_CHECK_WIDTH-1:0] recip_check_product_latched;
logic [RECIP_CHECK_WIDTH-1:0] recip_dividend_ext_latched;
logic [DIV_WIDTH-1:0] recip_quotient_corrected;
// Input arithmetic.
logic signed [32:0] sample_ext;
logic signed [32:0] Px_ext;
logic signed [32:0] sample_minus_px;
logic signed [32:0] oriented_error_next;
logic signed [32:0] neg_oriented_error_next;
logic quotient_negative_next;
logic [DIV_WIDTH-1:0] division_numerator_next;
logic signed [32:0] division_numerator_positive;
logic signed [32:0] division_numerator_negative;
// Result arithmetic.
logic signed [32:0] quotient_signed;
logic signed [32:0] raw_Errval;
logic signed [32:0] range_ext;
logic signed [32:0] range_midpoint_ext;
logic signed [32:0] modulo_Errval_after_add;
logic signed [32:0] modulo_Errval_after_add_latched;
logic signed [32:0] modulo_Errval;
logic signed [32:0] sign_restored_Errval;
logic signed [32:0] modulo_Errval_latched;
// Timing boundary between Annex A.5 modulo/sign restoration and
// reconstructed-sample dequantization. This register remains as a visible
// pipeline stage for the odd-scale carry-chain multiplier used below.
logic signed [32:0] sign_restored_Errval_latched;
logic signed [32:0] sign_restored_mul_latched;
logic signed [6:0] near_scale_latched;
logic signed [40:0] dequantized_error;
logic signed [40:0] dequantized_error_latched;
logic signed [40:0] reconstruction_base;
logic signed [40:0] reconstruction_base_latched;
logic signed [40:0] reconstruction_sum;
logic signed [40:0] reconstruction_sum_latched;
logic signed [40:0] range_scaled;
logic signed [40:0] range_scaled_latched;
logic signed [40:0] reconstruction_fixed;
logic signed [40:0] reconstruction_fixed_latched;
logic signed [40:0] maxval_ext;
logic signed [40:0] maxval_ext_latched;
logic signed [40:0] near_ext;
logic signed [40:0] near_ext_latched;
logic signed [40:0] maxval_plus_near_latched;
logic signed [40:0] negative_near_latched;
logic [PIX_WIDTH-1:0] reconstructed_next;
logic [PIX_WIDTH-1:0] reconstructed_calc_latched;
// Handshake and acceptance terms.
logic output_slot_open;
logic accept_corrected;
// Shared odd-scale multipliers for Annex A.5 reconstruction terms.
jls_near_scale_mul #(
.INPUT_WIDTH(33),
.OUTPUT_WIDTH(41)
) regular_recon_err_mul_i (
.multiplicand_i(sign_restored_mul_latched),
.near_scale_i(near_scale_latched[5:0]),
.product_o(dequantized_error)
);
jls_near_scale_mul #(
.INPUT_WIDTH(18),
.OUTPUT_WIDTH(41)
) regular_recon_range_mul_i (
.multiplicand_i($signed({1'b0, RANGE_latched})),
.near_scale_i(near_scale_latched[5:0]),
.product_o(range_scaled)
);
always_comb begin
output_slot_open = 1'b0;
if (!err_valid || err_ready) begin
output_slot_open = 1'b1;
end
end
always_comb begin
// Timing note: input acceptance is decoupled from err_ready. This state
// machine has several cycles before STATE_FINISH, so a pending err_valid
// can drain while the next pixel is being processed. If it has not drained
// by STATE_FINISH, the output_slot_open check below holds the result.
corrected_ready = 1'b0;
if (state == STATE_IDLE) begin
corrected_ready = 1'b1;
end
end
always_comb begin
accept_corrected = 1'b0;
if (corrected_valid && corrected_ready) begin
accept_corrected = 1'b1;
end
end
always_comb begin
sample_ext = $signed({17'd0, sample_latched});
Px_ext = $signed({17'd0, Px_latched});
sample_minus_px = sample_ext - Px_ext;
end
always_comb begin
oriented_error_next = sample_minus_px;
if (context_negative_latched) begin
oriented_error_next = -sample_minus_px;
end
end
always_comb begin
neg_oriented_error_next = -oriented_error_latched;
end
always_comb begin
quotient_negative_next = 1'b1;
division_numerator_positive = oriented_error_latched + $signed({27'd0, NEAR_latched});
division_numerator_negative = neg_oriented_error_next + $signed({27'd0, NEAR_latched});
division_numerator_next = division_numerator_negative[DIV_WIDTH-1:0];
if (oriented_error_latched > 33'sd0) begin
quotient_negative_next = 1'b0;
division_numerator_next = division_numerator_positive[DIV_WIDTH-1:0];
end
end
always_comb begin
divisor_small_next = {NEAR_latched[4:0], 1'b1};
end
always_comb begin
// ceil(2^24 / (2*NEAR+1)) for NEAR=1..31. The next pipeline stage
// corrects the possible +1 quotient overshoot by checking q*d > n.
reciprocal_magic_next = 23'd0;
case (NEAR_latched[4:0])
5'd1: begin
reciprocal_magic_next = 23'd5592406;
end
5'd2: begin
reciprocal_magic_next = 23'd3355444;
end
5'd3: begin
reciprocal_magic_next = 23'd2396746;
end
5'd4: begin
reciprocal_magic_next = 23'd1864136;
end
5'd5: begin
reciprocal_magic_next = 23'd1525202;
end
5'd6: begin
reciprocal_magic_next = 23'd1290556;
end
5'd7: begin
reciprocal_magic_next = 23'd1118482;
end
5'd8: begin
reciprocal_magic_next = 23'd986896;
end
5'd9: begin
reciprocal_magic_next = 23'd883012;
end
5'd10: begin
reciprocal_magic_next = 23'd798916;
end
5'd11: begin
reciprocal_magic_next = 23'd729445;
end
5'd12: begin
reciprocal_magic_next = 23'd671089;
end
5'd13: begin
reciprocal_magic_next = 23'd621379;
end
5'd14: begin
reciprocal_magic_next = 23'd578525;
end
5'd15: begin
reciprocal_magic_next = 23'd541201;
end
5'd16: begin
reciprocal_magic_next = 23'd508401;
end
5'd17: begin
reciprocal_magic_next = 23'd479350;
end
5'd18: begin
reciprocal_magic_next = 23'd453439;
end
5'd19: begin
reciprocal_magic_next = 23'd430186;
end
5'd20: begin
reciprocal_magic_next = 23'd409201;
end
5'd21: begin
reciprocal_magic_next = 23'd390168;
end
5'd22: begin
reciprocal_magic_next = 23'd372828;
end
5'd23: begin
reciprocal_magic_next = 23'd356963;
end
5'd24: begin
reciprocal_magic_next = 23'd342393;
end
5'd25: begin
reciprocal_magic_next = 23'd328966;
end
5'd26: begin
reciprocal_magic_next = 23'd316552;
end
5'd27: begin
reciprocal_magic_next = 23'd305041;
end
5'd28: begin
reciprocal_magic_next = 23'd294338;
end
5'd29: begin
reciprocal_magic_next = 23'd284360;
end
5'd30: begin
reciprocal_magic_next = 23'd275037;
end
5'd31: begin
reciprocal_magic_next = 23'd266306;
end
default: begin
reciprocal_magic_next = 23'd0;
end
endcase
end
always_comb begin
div_dividend_product_ext = {{RECIP_MAGIC_WIDTH{1'b0}}, div_dividend};
div_magic_product_ext = {{DIV_WIDTH{1'b0}}, div_magic};
end
always_comb begin
div_product_next = div_dividend_product_ext * div_magic_product_ext;
end
always_comb begin
recip_quotient_est = div_product >> RECIP_SHIFT;
end
always_comb begin
recip_quotient_est_ext = {{6{1'b0}}, recip_quotient_est};
recip_divisor_ext = {{DIV_WIDTH{1'b0}}, div_denominator};
recip_dividend_ext = {{6{1'b0}}, div_dividend};
end
always_comb begin
recip_check_product = recip_quotient_est_ext * recip_divisor_ext;
end
always_comb begin
recip_quotient_corrected = recip_quotient_est_latched;
if (recip_check_product_latched > recip_dividend_ext_latched) begin
recip_quotient_corrected = recip_quotient_est_latched - {{(DIV_WIDTH-1){1'b0}}, 1'b1};
end
end
always_comb begin
quotient_signed = $signed({16'd0, div_quotient});
if (quotient_negative_latched) begin
quotient_signed = -$signed({16'd0, div_quotient});
end
end
always_comb begin
raw_Errval = quotient_signed;
end
always_comb begin
range_ext = $signed({16'd0, RANGE_latched});
range_midpoint_ext = $signed({16'd0, ((RANGE_latched + 17'd1) >> 1)});
end
always_comb begin
modulo_Errval_after_add = raw_Errval;
if (raw_Errval < 33'sd0) begin
modulo_Errval_after_add = raw_Errval + range_ext;
end
end
always_comb begin
modulo_Errval = modulo_Errval_after_add_latched;
if (modulo_Errval_after_add_latched >= range_midpoint_ext) begin
modulo_Errval = modulo_Errval_after_add_latched - range_ext;
end
end
always_comb begin
sign_restored_Errval = modulo_Errval_latched;
if (context_negative_latched) begin
sign_restored_Errval = -modulo_Errval_latched;
end
end
always_comb begin
// Annex A.5 reconstruction base Px. The odd-scale multiplier products are
// computed by the shared helpers above and registered in STATE_RECON_MUL.
reconstruction_base = $signed({25'd0, Px_latched});
end
always_comb begin
reconstruction_sum = reconstruction_base_latched + dequantized_error_latched;
end
always_comb begin
maxval_ext = (41'sd1 <<< PIX_WIDTH) - 41'sd1;
near_ext = $signed({35'd0, NEAR_latched});
end
always_comb begin
reconstruction_fixed = reconstruction_sum_latched;
if (reconstruction_sum_latched < negative_near_latched) begin
reconstruction_fixed = reconstruction_sum_latched + range_scaled_latched;
end else if (reconstruction_sum_latched > maxval_plus_near_latched) begin
reconstruction_fixed = reconstruction_sum_latched - range_scaled_latched;
end
end
always_comb begin
reconstructed_next = reconstruction_fixed_latched[PIX_WIDTH-1:0];
if (reconstruction_fixed_latched < 41'sd0) begin
reconstructed_next = {PIX_WIDTH{1'b0}};
end else if (reconstruction_fixed_latched > maxval_ext_latched) begin
reconstructed_next = {PIX_WIDTH{1'b1}};
end
end
always_ff @(posedge clk) begin
if (rst) begin
state <= STATE_IDLE;
sample_latched <= {PIX_WIDTH{1'b0}};
Px_latched <= {PIX_WIDTH{1'b0}};
x_latched <= 13'd0;
y_latched <= 13'd0;
strip_first_latched <= 1'b0;
strip_last_latched <= 1'b0;
context_index_latched <= 9'd0;
context_negative_latched <= 1'b0;
run_mode_latched <= 1'b0;
A_latched <= 32'd0;
B_latched <= 32'sd0;
C_latched <= 9'sd0;
N_latched <= 16'd0;
RANGE_latched <= 17'd0;
qbpp_latched <= 5'd0;
LIMIT_latched <= 7'd0;
NEAR_latched <= 6'd0;
oriented_error_latched <= 33'sd0;
quotient_negative_latched <= 1'b0;
div_dividend <= {DIV_WIDTH{1'b0}};
div_quotient <= {DIV_WIDTH{1'b0}};
div_denominator <= 6'd0;
div_magic <= {RECIP_MAGIC_WIDTH{1'b0}};
div_product <= {RECIP_PRODUCT_WIDTH{1'b0}};
recip_quotient_est_latched <= {DIV_WIDTH{1'b0}};
recip_check_product_latched <= {RECIP_CHECK_WIDTH{1'b0}};
recip_dividend_ext_latched <= {RECIP_CHECK_WIDTH{1'b0}};
modulo_Errval_after_add_latched <= 33'sd0;
modulo_Errval_latched <= 33'sd0;
sign_restored_Errval_latched <= 33'sd0;
sign_restored_mul_latched <= 33'sd0;
near_scale_latched <= 7'sd1;
dequantized_error_latched <= 41'sd0;
reconstruction_base_latched <= 41'sd0;
reconstruction_sum_latched <= 41'sd0;
range_scaled_latched <= 41'sd0;
reconstruction_fixed_latched <= 41'sd0;
maxval_ext_latched <= 41'sd0;
near_ext_latched <= 41'sd0;
maxval_plus_near_latched <= 41'sd0;
negative_near_latched <= 41'sd0;
reconstructed_calc_latched <= {PIX_WIDTH{1'b0}};
err_valid <= 1'b0;
Errval <= 32'sd0;
reconstructed_sample <= {PIX_WIDTH{1'b0}};
err_x <= 13'd0;
err_y <= 13'd0;
err_strip_first_pixel <= 1'b0;
err_strip_last_pixel <= 1'b0;
err_context_index <= 9'd0;
err_context_negative <= 1'b0;
err_run_mode_context <= 1'b0;
err_qbpp <= 5'd0;
err_LIMIT <= 7'd0;
err_A <= 32'd0;
err_B <= 32'sd0;
err_C <= 9'sd0;
err_N <= 16'd0;
end else begin
if (err_valid && err_ready) begin
err_valid <= 1'b0;
end
case (state)
STATE_IDLE: begin
if (accept_corrected) begin
sample_latched <= corrected_sample;
Px_latched <= corrected_Px;
x_latched <= corrected_x;
y_latched <= corrected_y;
strip_first_latched <= corrected_strip_first_pixel;
strip_last_latched <= corrected_strip_last_pixel;
context_index_latched <= corrected_context_index;
context_negative_latched <= corrected_context_negative;
run_mode_latched <= corrected_run_mode_context;
A_latched <= corrected_A;
B_latched <= corrected_B;
C_latched <= corrected_C;
N_latched <= corrected_N;
RANGE_latched <= RANGE;
qbpp_latched <= qbpp;
LIMIT_latched <= LIMIT;
NEAR_latched <= NEAR;
div_quotient <= {DIV_WIDTH{1'b0}};
div_product <= {RECIP_PRODUCT_WIDTH{1'b0}};
state <= STATE_INPUT_PREP;
end
end
STATE_INPUT_PREP: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 Errval quantization
// Figure : N/A
// Table : N/A
// Pseudocode : Errval = Ix - Px, sign orientation, |Errval| + NEAR
// Stage note : Corrected input fields were captured in STATE_IDLE.
// This stage registers the divisor numerator before the reciprocal
// DSP multiply, breaking corrected_sample/Px to div_product timing.
oriented_error_latched <= oriented_error_next;
state <= STATE_NUMERATOR_PREP;
end
STATE_NUMERATOR_PREP: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 Errval quantization
// Figure : N/A
// Table : N/A
// Pseudocode : compute quotient sign and numerator for NEAR division
// Stage note : Uses oriented_error_latched from STATE_INPUT_PREP so
// the Ix-Px subtract/sign mux is separated from |Errval| + NEAR and
// the DSP B-input register used by the reciprocal multiply.
quotient_negative_latched <= quotient_negative_next;
if (NEAR_latched == 6'd0) begin
div_quotient <= division_numerator_next;
state <= STATE_ERRVAL_PREP;
end else begin
div_dividend <= division_numerator_next;
div_denominator <= divisor_small_next;
div_magic <= reciprocal_magic_next;
state <= STATE_DIV_MUL;
end
end
STATE_DIV_MUL: begin
div_product <= div_product_next;
state <= STATE_DIV_CHECK;
end
STATE_DIV_CHECK: begin
// Stage note : Register q*d and dividend before the final quotient
// correction. This keeps the DSP product output out of the carry
// chain that subtracts one from the reciprocal quotient estimate.
recip_quotient_est_latched <= recip_quotient_est;
recip_check_product_latched <= recip_check_product;
recip_dividend_ext_latched <= recip_dividend_ext;
state <= STATE_DIV_CORRECT;
end
STATE_DIV_CORRECT: begin
div_quotient <= recip_quotient_corrected;
state <= STATE_ERRVAL_PREP;
end
STATE_ERRVAL_PREP: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 Errval quantization and modulo reduction
// Figure : N/A
// Table : N/A
// Pseudocode : Errval modulo normalization, first wrap step
// Stage note : Capture Errval+RANGE before the midpoint compare so
// div_quotient no longer feeds both carry chains in one cycle.
modulo_Errval_after_add_latched <= modulo_Errval_after_add;
state <= STATE_ERRVAL;
end
STATE_ERRVAL: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 Errval quantization and modulo reduction
// Figure : N/A
// Table : N/A
// Pseudocode : Errval modulo normalization, midpoint wrap step
// Stage note : STATE_ERRVAL_PREP already registered the first modulo
// add; this state now contains only the midpoint compare/subtract.
modulo_Errval_latched <= modulo_Errval;
state <= STATE_ERRVAL_SIGN;
end
STATE_ERRVAL_SIGN: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 Errval quantization and modulo reduction
// Figure : N/A
// Table : N/A
// Pseudocode : restore Errval sign after modulo normalization
// Stage note : modulo_Errval_latched separates the divider/modulo
// carry chain from the context sign mux and reconstruction DSP input.
sign_restored_Errval_latched <= sign_restored_Errval;
state <= STATE_RECON_FACTORS;
end
STATE_RECON_FACTORS: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 reconstructed sample Rx
// Figure : N/A
// Table : N/A
// Pseudocode : prepare Errval and (2*NEAR+1) for dequantization
// Stage note : Explicit operand registers give the reconstruction
// odd-scale multiplier a clean input boundary before Errval*(2*NEAR+1).
sign_restored_mul_latched <= sign_restored_Errval_latched;
near_scale_latched <= $signed({NEAR_latched, 1'b1});
state <= STATE_RECON_MUL;
end
STATE_RECON_MUL: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 reconstructed sample Rx
// Figure : N/A
// Table : N/A
// Pseudocode : Errval * (2*NEAR+1) and RANGE * (2*NEAR+1)
// Stage note : Register products before wrap/clamp to reduce the
// NEAR-to-Rx combinational depth at the 250 MHz target.
dequantized_error_latched <= dequantized_error;
reconstruction_base_latched <= reconstruction_base;
range_scaled_latched <= range_scaled;
maxval_ext_latched <= maxval_ext;
near_ext_latched <= near_ext;
state <= STATE_RECON_SUM;
end
STATE_RECON_SUM: begin
// Stage note : Register the reconstruction sum and wrap thresholds
// before Annex A.5 range wrapping. This splits maxval/near boundary
// comparison from the add/subtract that forms reconstruction_fixed.
reconstruction_sum_latched <= reconstruction_sum;
maxval_plus_near_latched <= maxval_ext_latched + near_ext_latched;
negative_near_latched <= -near_ext_latched;
state <= STATE_RECON_CALC;
end
STATE_RECON_CALC: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 reconstructed sample Rx
// Figure : N/A
// Table : N/A
// Pseudocode : wrap Px + Errval * (2*NEAR+1) into the extended range
// Stage note : Register the JPEG-LS wrap result before the final
// [0, MAXVAL] clamp, splitting the reported maxval-to-Rx path.
reconstruction_fixed_latched <= reconstruction_fixed;
state <= STATE_RECON_CLAMP;
end
STATE_RECON_CLAMP: begin
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.5 reconstructed sample Rx
// Figure : N/A
// Table : N/A
// Pseudocode : Rx = clamp(wrapped reconstruction, 0, MAXVAL)
// Stage note : The final sample clamp is isolated from the preceding
// wrap add/subtract logic for 250 MHz timing closure.
reconstructed_calc_latched <= reconstructed_next;
state <= STATE_FINISH;
end
STATE_FINISH: begin
if (output_slot_open) begin
err_valid <= 1'b1;
Errval <= modulo_Errval_latched[31:0];
reconstructed_sample <= reconstructed_calc_latched;
err_x <= x_latched;
err_y <= y_latched;
err_strip_first_pixel <= strip_first_latched;
err_strip_last_pixel <= strip_last_latched;
err_context_index <= context_index_latched;
err_context_negative <= context_negative_latched;
err_run_mode_context <= run_mode_latched;
err_qbpp <= qbpp_latched;
err_LIMIT <= LIMIT_latched;
err_A <= A_latched;
err_B <= B_latched;
err_C <= C_latched;
err_N <= N_latched;
state <= STATE_IDLE;
end
end
default: begin
state <= STATE_IDLE;
end
endcase
end
end
endmodule
`default_nettype wire

1476
fpga/verilog/jls_run_mode.sv Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,364 @@
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
// Clause : Annex A.8 control procedure, Annex D.1-D.3 scan control
// Figure : N/A
// Table : N/A
// Pseudocode : Start one JPEG-LS scan per standalone strip frame
// Trace : docs/jls_traceability.md#jls-scan-control
// Example : The first pixel of each strip emits strip_start_valid.
//
// Scan controller for the strip-frame architecture. It converts pixel boundary
// flags from jls_input_ctrl into strip start/finish commands and forwards the
// pixel stream to the later predictor/context pipeline. A one-entry registered
// slot breaks the input pixel_valid path away from downstream strip-start and
// context ready/CE controls while still allowing one accepted pixel per cycle
// when the slot drains and refills in the same cycle.
`default_nettype none
module jls_scan_ctrl #(
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
parameter int PIX_WIDTH = 16,
// Number of original-image rows in one standalone JPEG-LS strip frame.
parameter int SCAN_ROWS = 16
) (
// Main 250 MHz clock.
input var logic clk,
// Synchronous active-high reset.
input var logic rst,
// Pixel event from jls_input_ctrl is valid.
input var logic pixel_valid,
// This controller accepted the current input pixel event.
output logic pixel_ready,
// Grayscale input sample.
input var logic [PIX_WIDTH-1:0] pixel_sample,
// Zero-based original-image column coordinate.
input var logic [12:0] pixel_x,
// Zero-based original-image row coordinate.
input var logic [12:0] pixel_y,
// First pixel of the current standalone strip frame.
input var logic strip_first_pixel,
// Last pixel of the current standalone strip frame.
input var logic strip_last_pixel,
// First pixel of the current original input image.
input var logic image_first_pixel,
// Last pixel of the current original input image.
input var logic image_last_pixel,
// Effective image width after runtime validation and fallback.
input var logic [12:0] active_pic_col,
// Latched ratio for the current original image.
input var logic [3:0] active_ratio,
// Dynamic NEAR value from jls_near_ctrl for non-first strips.
input var logic [5:0] current_near,
// Pixel event forwarded to the predictor/context pipeline.
output logic enc_pixel_valid,
// Downstream predictor/context pipeline can accept the forwarded pixel.
input var logic enc_pixel_ready,
// Forwarded grayscale sample.
output logic [PIX_WIDTH-1:0] enc_pixel_sample,
// Forwarded original-image column coordinate.
output logic [12:0] enc_pixel_x,
// Forwarded original-image row coordinate.
output logic [12:0] enc_pixel_y,
// Forwarded row-last flag, registered with enc_pixel_x/y. This is distinct
// from enc_strip_last_pixel, which marks the last pixel of the whole strip.
output logic enc_row_last_pixel,
// Forwarded first-pixel flag for strip-local boundary handling.
output logic enc_strip_first_pixel,
// Forwarded last-pixel flag for strip-local flush handling.
output logic enc_strip_last_pixel,
// Strip start command for jls_header_writer.
output logic strip_start_valid,
// Header writer can accept a strip start command.
input var logic strip_start_ready,
// Marks the first strip of an original input image.
output logic original_image_first_strip,
// Strip frame width written to SOF55.X.
output logic [12:0] strip_width,
// Strip frame height written to SOF55.Y.
output logic [12:0] strip_height,
// NEAR value used by this strip frame.
output logic [5:0] strip_near,
// Strip finish command after the last strip pixel enters the encode pipeline.
output logic strip_finish_valid,
// Downstream finish handler can accept the strip finish command.
input var logic strip_finish_ready,
// Marks the last strip of an original input image.
output logic original_image_last_strip,
// Number of pixels in the completed strip frame.
output logic [31:0] strip_pixel_count,
// Original-image start pulse for jls_near_ctrl.
output logic near_image_start_valid,
// Ratio forwarded to jls_near_ctrl at original-image start.
output logic [3:0] near_image_ratio
);
// Strip height as a sized SOF55.Y field.
localparam logic [12:0] SCAN_ROWS_VALUE = SCAN_ROWS;
// Running count of pixels accepted in the current strip.
logic [31:0] strip_pixel_count_running;
logic [31:0] strip_pixel_count_next;
// One-entry timing slot between the FIFO-facing input controller and the
// downstream JPEG-LS strip/encode pipeline.
logic slot_valid;
logic [PIX_WIDTH-1:0] slot_sample;
logic [12:0] slot_x;
logic [12:0] slot_y;
logic slot_strip_first_pixel;
logic slot_strip_last_pixel;
logic slot_row_last_pixel;
logic slot_image_first_pixel;
logic slot_image_last_pixel;
logic [12:0] slot_active_pic_col;
logic [5:0] slot_strip_near;
// Independent readiness terms for input loading, strip commands, and encode
// pipeline forwarding.
logic input_start_path_ready;
logic input_finish_path_ready;
logic input_boundary_ready;
logic start_path_ready;
logic finish_path_ready;
logic all_paths_ready;
logic slot_open_for_input;
logic accepted_input;
logic forward_slot;
logic [12:0] input_row_last_col;
logic input_row_last_pixel;
// First-strip NEAR must be zero even if jls_near_ctrl has not yet reset on
// the same SOF pixel cycle.
logic [5:0] selected_strip_near;
always_comb begin
input_start_path_ready = 1'b1;
if (strip_first_pixel && !strip_start_ready) begin
input_start_path_ready = 1'b0;
end
end
always_comb begin
input_finish_path_ready = 1'b1;
if (strip_last_pixel && !strip_finish_ready) begin
input_finish_path_ready = 1'b0;
end
end
always_comb begin
input_boundary_ready = 1'b0;
if (input_start_path_ready && input_finish_path_ready) begin
input_boundary_ready = 1'b1;
end
end
always_comb begin
start_path_ready = 1'b1;
if (slot_strip_first_pixel && !strip_start_ready) begin
start_path_ready = 1'b0;
end
end
always_comb begin
finish_path_ready = 1'b1;
if (slot_strip_last_pixel && !strip_finish_ready) begin
finish_path_ready = 1'b0;
end
end
always_comb begin
all_paths_ready = 1'b0;
if (start_path_ready && finish_path_ready && enc_pixel_ready) begin
all_paths_ready = 1'b1;
end
end
always_comb begin
forward_slot = 1'b0;
if (slot_valid && all_paths_ready) begin
forward_slot = 1'b1;
end
end
always_comb begin
slot_open_for_input = 1'b0;
if (!slot_valid || forward_slot) begin
slot_open_for_input = 1'b1;
end
end
always_comb begin
pixel_ready = 1'b0;
if (input_boundary_ready && slot_open_for_input) begin
pixel_ready = 1'b1;
end
end
always_comb begin
accepted_input = 1'b0;
if (pixel_valid && pixel_ready) begin
accepted_input = 1'b1;
end
end
always_comb begin
enc_pixel_valid = 1'b0;
if (slot_valid && start_path_ready && finish_path_ready) begin
enc_pixel_valid = 1'b1;
end
end
always_comb begin
enc_pixel_sample = slot_sample;
enc_pixel_x = slot_x;
enc_pixel_y = slot_y;
enc_row_last_pixel = slot_row_last_pixel;
enc_strip_first_pixel = slot_strip_first_pixel;
enc_strip_last_pixel = slot_strip_last_pixel;
end
always_comb begin
strip_start_valid = 1'b0;
if (slot_valid && slot_strip_first_pixel && finish_path_ready && enc_pixel_ready) begin
strip_start_valid = 1'b1;
end
end
always_comb begin
strip_finish_valid = 1'b0;
if (slot_valid && slot_strip_last_pixel && start_path_ready && enc_pixel_ready) begin
strip_finish_valid = 1'b1;
end
end
always_comb begin
original_image_first_strip = slot_image_first_pixel;
original_image_last_strip = slot_image_last_pixel;
strip_width = slot_active_pic_col;
strip_height = SCAN_ROWS_VALUE;
end
always_comb begin
selected_strip_near = current_near;
if (image_first_pixel) begin
selected_strip_near = 6'd0;
end
end
always_comb begin
input_row_last_col = active_pic_col - 13'd1;
end
always_comb begin
input_row_last_pixel = 1'b0;
if (pixel_x == input_row_last_col) begin
input_row_last_pixel = 1'b1;
end
end
always_comb begin
strip_near = slot_strip_near;
end
always_comb begin
strip_pixel_count_next = strip_pixel_count_running + 32'd1;
end
always_comb begin
strip_pixel_count = 32'd0;
if (slot_strip_last_pixel) begin
strip_pixel_count = strip_pixel_count_next;
end
end
always_comb begin
near_image_start_valid = 1'b0;
if (accepted_input && image_first_pixel) begin
near_image_start_valid = 1'b1;
end
end
always_comb begin
near_image_ratio = active_ratio;
end
always_ff @(posedge clk) begin
if (rst) begin
slot_valid <= 1'b0;
slot_sample <= {PIX_WIDTH{1'b0}};
slot_x <= 13'd0;
slot_y <= 13'd0;
slot_strip_first_pixel <= 1'b0;
slot_strip_last_pixel <= 1'b0;
slot_row_last_pixel <= 1'b0;
slot_image_first_pixel <= 1'b0;
slot_image_last_pixel <= 1'b0;
slot_active_pic_col <= 13'd0;
slot_strip_near <= 6'd0;
strip_pixel_count_running <= 32'd0;
end else begin
if (forward_slot) begin
if (slot_strip_last_pixel) begin
strip_pixel_count_running <= 32'd0;
end else if (slot_strip_first_pixel) begin
strip_pixel_count_running <= 32'd1;
end else begin
strip_pixel_count_running <= strip_pixel_count_next;
end
end
if (accepted_input) begin
slot_valid <= 1'b1;
slot_sample <= pixel_sample;
slot_x <= pixel_x;
slot_y <= pixel_y;
slot_strip_first_pixel <= strip_first_pixel;
slot_strip_last_pixel <= strip_last_pixel;
slot_row_last_pixel <= input_row_last_pixel;
slot_image_first_pixel <= image_first_pixel;
slot_image_last_pixel <= image_last_pixel;
slot_active_pic_col <= active_pic_col;
slot_strip_near <= selected_strip_near;
end else if (forward_slot) begin
slot_valid <= 1'b0;
end
end
end
endmodule
`default_nettype wire

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
fpga/verilog/jls_common_pkg.sv
fpga/verilog/jls_preset_defaults.sv
fpga/verilog/jls_coding_params.sv
fpga/verilog/jls_input_ctrl.sv
fpga/verilog/jls_scan_ctrl.sv
fpga/verilog/jls_neighbor_provider.sv
fpga/verilog/jls_mode_router.sv
fpga/verilog/jls_predictor.sv
fpga/verilog/jls_context_quantizer.sv
fpga/verilog/jls_context_model.sv
fpga/verilog/jls_prediction_corrector.sv
fpga/verilog/jls_near_scale_mul.sv
fpga/verilog/jls_regular_error_quantizer.sv
fpga/verilog/jls_header_writer.sv
fpga/verilog/jls_near_ctrl.sv
fpga/verilog/jls_context_memory.sv
fpga/verilog/jls_context_update.sv
fpga/verilog/jls_error_mapper.sv
fpga/verilog/jls_run_mode.sv
fpga/verilog/jls_golomb_encoder.sv
fpga/verilog/jls_bit_packer.sv
fpga/verilog/jls_byte_arbiter.sv
fpga/verilog/jls_output_buffer.sv
fpga/verilog/jpeg_ls_encoder_top.sv