Initial JPEG-LS FPGA encoder baseline with tooling and timeout fix
This commit is contained in:
225
fpga/sim/run_jls_pattern_charls_regression.ps1
Normal file
225
fpga/sim/run_jls_pattern_charls_regression.ps1
Normal file
@@ -0,0 +1,225 @@
|
||||
param(
|
||||
[string] $StartAtCase = "",
|
||||
[switch] $Resume,
|
||||
[ValidateSet(1, 2, 4, 8)]
|
||||
[int] $ConfiguredCompressionRatio = 1,
|
||||
[string] $OnlyCase = ""
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Invoke-CheckedCommand {
|
||||
param(
|
||||
[string] $Description,
|
||||
[scriptblock] $Command
|
||||
)
|
||||
|
||||
Write-Host "[jls-pattern-regression] $Description"
|
||||
$CommandOutput = & $Command 2>&1
|
||||
$CommandExitCode = $LASTEXITCODE
|
||||
$CommandText = $CommandOutput | Out-String
|
||||
$CommandOutput | ForEach-Object { Write-Host $_ }
|
||||
|
||||
if ($CommandExitCode -ne 0) {
|
||||
throw ("Command failed with exit code {0}: {1}" -f $CommandExitCode, $Description)
|
||||
}
|
||||
if (($CommandText -match "\*\* Fatal") -or
|
||||
($CommandText -match "\*\* Error") -or
|
||||
($CommandText -match "Errors[=:]\s*[1-9]")) {
|
||||
throw "Command reported simulator errors: $Description"
|
||||
}
|
||||
}
|
||||
|
||||
function To-PosixPath {
|
||||
param([string] $Path)
|
||||
return ($Path -replace "\\", "/")
|
||||
}
|
||||
|
||||
function Get-RtlRatioPortValue {
|
||||
param([int] $ConfiguredCompressionRatio)
|
||||
|
||||
switch ($ConfiguredCompressionRatio) {
|
||||
1 { return 0 }
|
||||
2 { return 1 }
|
||||
4 { return 2 }
|
||||
8 { return 3 }
|
||||
default {
|
||||
throw "Unsupported configured compression ratio: $ConfiguredCompressionRatio"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$RtlRatioPort = Get-RtlRatioPortValue -ConfiguredCompressionRatio $ConfiguredCompressionRatio
|
||||
$RepoRoot = (Resolve-Path ".").Path
|
||||
$OutDir = Join-Path $RepoRoot ("tools\jls_compat\out\pattern_regression_cr" + $ConfiguredCompressionRatio)
|
||||
$DepPath = Join-Path $RepoRoot "tools\jls_compat\.deps"
|
||||
$SummaryCsv = Join-Path $OutDir "pattern_regression_summary.csv"
|
||||
$QuestaBin = "C:\questasim64_2020.1\win64"
|
||||
$VlogExe = Join-Path $QuestaBin "vlog.exe"
|
||||
$VsimExe = Join-Path $QuestaBin "vsim.exe"
|
||||
|
||||
if (!(Test-Path -Path $VlogExe)) {
|
||||
throw "Questa vlog.exe not found at $VlogExe"
|
||||
}
|
||||
if (!(Test-Path -Path $VsimExe)) {
|
||||
throw "Questa vsim.exe not found at $VsimExe"
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $OutDir | Out-Null
|
||||
if (!$Resume) {
|
||||
Remove-Item -Path (Join-Path $OutDir "*.jls") -ErrorAction SilentlyContinue
|
||||
Remove-Item -Path (Join-Path $OutDir "*.rtljls") -ErrorAction SilentlyContinue
|
||||
Remove-Item -Path (Join-Path $OutDir "*.log") -ErrorAction SilentlyContinue
|
||||
Remove-Item -Path (Join-Path $OutDir "*.json") -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
if ((!$Resume) -or !(Test-Path -Path $SummaryCsv)) {
|
||||
"case_name,configured_compression_ratio,rtl_ratio_port,source_pgm,reference_jls,rtl_jls,width,height,max_value,expected_frames,status,detail" |
|
||||
Set-Content -Path $SummaryCsv -Encoding ascii
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile RTL top smoke testbench for file-driven pattern regression" {
|
||||
& $VlogExe -sv -f fpga/verilog/jpeg_ls_rtl.f fpga/sim/tb_jpeg_ls_encoder_top_run_smoke.sv
|
||||
}
|
||||
|
||||
$env:JLS_COMPAT_PYDEPS = $DepPath
|
||||
$StartRunning = [string]::IsNullOrEmpty($StartAtCase)
|
||||
|
||||
$PatternFiles = Get-ChildItem -Path (Join-Path $RepoRoot "img\patterns") -Filter "*.pgm" |
|
||||
Sort-Object -Property Name
|
||||
|
||||
foreach ($PatternFile in $PatternFiles) {
|
||||
$SourcePgm = $PatternFile.FullName
|
||||
$PatternLeaf = $PatternFile.Name
|
||||
$CaseName = [System.IO.Path]::GetFileNameWithoutExtension($SourcePgm)
|
||||
|
||||
if (![string]::IsNullOrEmpty($OnlyCase) -and ($CaseName -ne $OnlyCase)) {
|
||||
continue
|
||||
}
|
||||
|
||||
$ReferenceJls = Join-Path $RepoRoot (
|
||||
"img\reference\charls\" + $CaseName + "-r" + $RtlRatioPort + ".charlsjls"
|
||||
)
|
||||
|
||||
if (!$StartRunning) {
|
||||
if ($CaseName -eq $StartAtCase) {
|
||||
$StartRunning = $true
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
$PgmFd = [System.IO.File]::OpenRead($SourcePgm)
|
||||
$Reader = New-Object System.IO.BinaryReader($PgmFd)
|
||||
try {
|
||||
$Magic = [System.Text.Encoding]::ASCII.GetString($Reader.ReadBytes(2))
|
||||
if ($Magic -ne "P5") {
|
||||
throw "Input file is not P5 PGM: $SourcePgm"
|
||||
}
|
||||
|
||||
function Read-PgmToken {
|
||||
param([System.IO.BinaryReader] $TokenReader)
|
||||
|
||||
while ($true) {
|
||||
$Byte = $TokenReader.ReadByte()
|
||||
if ($Byte -eq 35) {
|
||||
while ($TokenReader.ReadByte() -ne 10) {
|
||||
}
|
||||
continue
|
||||
}
|
||||
if (($Byte -eq 32) -or ($Byte -eq 9) -or ($Byte -eq 10) -or ($Byte -eq 13)) {
|
||||
continue
|
||||
}
|
||||
|
||||
$Bytes = New-Object System.Collections.Generic.List[byte]
|
||||
$Bytes.Add([byte]$Byte)
|
||||
while ($true) {
|
||||
$Peek = $TokenReader.PeekChar()
|
||||
if ($Peek -lt 0) {
|
||||
break
|
||||
}
|
||||
if (($Peek -eq 35) -or ($Peek -eq 32) -or ($Peek -eq 9) -or
|
||||
($Peek -eq 10) -or ($Peek -eq 13)) {
|
||||
break
|
||||
}
|
||||
$Bytes.Add($TokenReader.ReadByte())
|
||||
}
|
||||
return [System.Text.Encoding]::ASCII.GetString($Bytes.ToArray())
|
||||
}
|
||||
}
|
||||
|
||||
$Width = [int](Read-PgmToken $Reader)
|
||||
$Height = [int](Read-PgmToken $Reader)
|
||||
$MaxValue = [int](Read-PgmToken $Reader)
|
||||
}
|
||||
finally {
|
||||
$Reader.Close()
|
||||
$PgmFd.Close()
|
||||
}
|
||||
|
||||
$RtlJls = Join-Path $OutDir ($CaseName + ".rtljls")
|
||||
$TranscriptLog = Join-Path $OutDir ($CaseName + ".vsim.log")
|
||||
$CaseJson = Join-Path $OutDir ($CaseName + ".summary.json")
|
||||
$ExpectedFrames = [int]($Height / 16)
|
||||
|
||||
$VsimArgs = @(
|
||||
"-c",
|
||||
"tb_jpeg_ls_encoder_top_run_smoke",
|
||||
"-gPIX_WIDTH=16",
|
||||
("-gPIC_COL=" + $Width),
|
||||
("-gPIC_ROW=" + $Height),
|
||||
("+IN_PGM=" + (To-PosixPath $SourcePgm)),
|
||||
("+RATIO=" + $RtlRatioPort),
|
||||
("+OUT=" + (To-PosixPath $RtlJls)),
|
||||
("+CASE=" + $CaseName),
|
||||
"-do",
|
||||
"run -all; quit"
|
||||
)
|
||||
|
||||
try {
|
||||
Invoke-CheckedCommand "run Questa RTL regression for $CaseName" {
|
||||
& $VsimExe @VsimArgs 2>&1 | Tee-Object -FilePath $TranscriptLog
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "decode and compare RTL output against CharLS reference for $CaseName" {
|
||||
python tools/jls_compat/validate_dynamic_near_stream.py `
|
||||
$RtlJls `
|
||||
--reference-pgm $SourcePgm `
|
||||
--configured-compression-ratio $ConfiguredCompressionRatio `
|
||||
--expected-frames $ExpectedFrames `
|
||||
--output-reference-jls $ReferenceJls `
|
||||
--summary-json $CaseJson
|
||||
}
|
||||
|
||||
"{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},PASS,OK" -f `
|
||||
$CaseName, `
|
||||
$ConfiguredCompressionRatio, `
|
||||
$RtlRatioPort, `
|
||||
(To-PosixPath ("img/patterns/" + $PatternLeaf)), `
|
||||
(To-PosixPath ((Resolve-Path -LiteralPath $ReferenceJls).Path)), `
|
||||
(To-PosixPath $RtlJls), `
|
||||
$Width, `
|
||||
$Height, `
|
||||
$MaxValue, `
|
||||
$ExpectedFrames | Add-Content -Path $SummaryCsv -Encoding ascii
|
||||
}
|
||||
catch {
|
||||
$Detail = $_.Exception.Message.Replace(",", ";").Replace("`r", " ").Replace("`n", " ")
|
||||
Write-Warning "[jls-pattern-regression] FAIL $CaseName : $Detail"
|
||||
"{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},FAIL,{10}" -f `
|
||||
$CaseName, `
|
||||
$ConfiguredCompressionRatio, `
|
||||
$RtlRatioPort, `
|
||||
(To-PosixPath ("img/patterns/" + $PatternLeaf)), `
|
||||
(To-PosixPath ("img/reference/charls/" + $CaseName + "-r" + $RtlRatioPort + ".charlsjls")), `
|
||||
(To-PosixPath $RtlJls), `
|
||||
$Width, `
|
||||
$Height, `
|
||||
$MaxValue, `
|
||||
$ExpectedFrames, `
|
||||
$Detail | Add-Content -Path $SummaryCsv -Encoding ascii
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host "[jls-pattern-regression] DONE"
|
||||
Write-Host "[jls-pattern-regression] Summary: $SummaryCsv"
|
||||
231
fpga/sim/run_jls_smoke.ps1
Normal file
231
fpga/sim/run_jls_smoke.ps1
Normal file
@@ -0,0 +1,231 @@
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Invoke-CheckedCommand {
|
||||
param(
|
||||
[string] $Description,
|
||||
[scriptblock] $Command
|
||||
)
|
||||
|
||||
Write-Host "[jls-smoke] $Description"
|
||||
$CommandOutput = & $Command 2>&1
|
||||
$CommandExitCode = $LASTEXITCODE
|
||||
$CommandText = $CommandOutput | Out-String
|
||||
$CommandOutput | ForEach-Object { Write-Host $_ }
|
||||
|
||||
if ($CommandExitCode -ne 0) {
|
||||
exit $CommandExitCode
|
||||
}
|
||||
if (($CommandText -match "\*\* Fatal") -or ($CommandText -match "Errors[=:]\s*[1-9]")) {
|
||||
Write-Error "Command reported simulator errors: $Description"
|
||||
}
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile RTL filelist" {
|
||||
vlog -sv -f fpga/verilog/jpeg_ls_rtl.f
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile top-level idle smoke test" {
|
||||
vlog -sv -f fpga/verilog/jpeg_ls_rtl.f fpga/sim/tb_jpeg_ls_encoder_top_idle.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run top-level idle smoke test" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_idle -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile top-level run-mode smoke test" {
|
||||
vlog -sv -f fpga/verilog/jpeg_ls_rtl.f fpga/sim/tb_jpeg_ls_encoder_top_run_smoke.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run top-level run-mode smoke test" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run top-level multi-image smoke test" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke -gIMAGE_COUNT=2 +OUT=tools/jls_compat/out/rtl_top_zero_8b_2image.jls -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile preset-defaults smoke test" {
|
||||
vlog -sv fpga/verilog/jls_preset_defaults.sv fpga/sim/tb_jls_preset_defaults.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run preset-defaults smoke test" {
|
||||
vsim -c tb_jls_preset_defaults -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile coding-params smoke test" {
|
||||
vlog -sv fpga/verilog/jls_coding_params.sv fpga/sim/tb_jls_coding_params.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run coding-params smoke test" {
|
||||
vsim -c tb_jls_coding_params -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile input-control smoke test" {
|
||||
vlog -sv fpga/verilog/jls_common_pkg.sv fpga/verilog/jls_input_ctrl.sv fpga/sim/tb_jls_input_ctrl.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run input-control smoke test" {
|
||||
vsim -c tb_jls_input_ctrl -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile scan-control smoke test" {
|
||||
vlog -sv fpga/verilog/jls_scan_ctrl.sv fpga/sim/tb_jls_scan_ctrl.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run scan-control smoke test" {
|
||||
vsim -c tb_jls_scan_ctrl -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile neighbor-provider smoke test" {
|
||||
vlog -sv fpga/verilog/jls_neighbor_provider.sv fpga/sim/tb_jls_neighbor_provider.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run neighbor-provider smoke test" {
|
||||
vsim -c tb_jls_neighbor_provider -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile neighbor-provider lossless-fast smoke test" {
|
||||
vlog -sv fpga/verilog/jls_neighbor_provider.sv fpga/sim/tb_jls_neighbor_provider_lossless_fast.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run neighbor-provider lossless-fast smoke test" {
|
||||
vsim -c tb_jls_neighbor_provider_lossless_fast -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile neighbor-provider near-bypass smoke test" {
|
||||
vlog -sv fpga/verilog/jls_neighbor_provider.sv fpga/sim/tb_jls_neighbor_provider_near_bypass.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run neighbor-provider near-bypass smoke test" {
|
||||
vsim -c tb_jls_neighbor_provider_near_bypass -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile mode-router smoke test" {
|
||||
vlog -sv fpga/verilog/jls_mode_router.sv fpga/sim/tb_jls_mode_router.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run mode-router smoke test" {
|
||||
vsim -c tb_jls_mode_router -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile predictor smoke test" {
|
||||
vlog -sv fpga/verilog/jls_predictor.sv fpga/sim/tb_jls_predictor.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run predictor smoke test" {
|
||||
vsim -c tb_jls_predictor -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile context-quantizer smoke test" {
|
||||
vlog -sv fpga/verilog/jls_context_quantizer.sv fpga/sim/tb_jls_context_quantizer.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run context-quantizer smoke test" {
|
||||
vsim -c tb_jls_context_quantizer -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile context-model smoke test" {
|
||||
vlog -sv fpga/verilog/jls_context_memory.sv fpga/verilog/jls_context_model.sv fpga/sim/tb_jls_context_model.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run context-model smoke test" {
|
||||
vsim -c tb_jls_context_model -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile prediction-corrector smoke test" {
|
||||
vlog -sv fpga/verilog/jls_prediction_corrector.sv fpga/sim/tb_jls_prediction_corrector.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run prediction-corrector smoke test" {
|
||||
vsim -c tb_jls_prediction_corrector -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile regular-error-quantizer smoke test" {
|
||||
vlog -sv fpga/verilog/jls_regular_error_quantizer.sv fpga/sim/tb_jls_regular_error_quantizer.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run regular-error-quantizer smoke test" {
|
||||
vsim -c tb_jls_regular_error_quantizer -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile header-writer smoke test" {
|
||||
vlog -sv fpga/verilog/jls_common_pkg.sv fpga/verilog/jls_header_writer.sv fpga/sim/tb_jls_header_writer.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run header-writer smoke test" {
|
||||
vsim -c tb_jls_header_writer -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile near-control smoke test" {
|
||||
vlog -sv fpga/verilog/jls_near_ctrl.sv fpga/sim/tb_jls_near_ctrl.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run near-control smoke test" {
|
||||
vsim -c tb_jls_near_ctrl -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile context-memory smoke test" {
|
||||
vlog -sv fpga/verilog/jls_context_memory.sv fpga/sim/tb_jls_context_memory.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run context-memory smoke test" {
|
||||
vsim -c tb_jls_context_memory -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile context-update smoke test" {
|
||||
vlog -sv fpga/verilog/jls_context_update.sv fpga/sim/tb_jls_context_update.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run context-update smoke test" {
|
||||
vsim -c tb_jls_context_update -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile error-mapper smoke test" {
|
||||
vlog -sv fpga/verilog/jls_error_mapper.sv fpga/sim/tb_jls_error_mapper.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run error-mapper smoke test" {
|
||||
vsim -c tb_jls_error_mapper -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile run-mode smoke test" {
|
||||
vlog -sv fpga/verilog/jls_run_mode.sv fpga/sim/tb_jls_run_mode.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run run-mode smoke test" {
|
||||
vsim -c tb_jls_run_mode -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile Golomb encoder smoke test" {
|
||||
vlog -sv fpga/verilog/jls_golomb_encoder.sv fpga/sim/tb_jls_golomb_encoder.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run Golomb encoder smoke test" {
|
||||
vsim -c tb_jls_golomb_encoder -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile bit-packer smoke test" {
|
||||
vlog -sv fpga/verilog/jls_bit_packer.sv fpga/sim/tb_jls_bit_packer.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run bit-packer smoke test" {
|
||||
vsim -c tb_jls_bit_packer -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile byte-arbiter smoke test" {
|
||||
vlog -sv fpga/verilog/jls_byte_arbiter.sv fpga/sim/tb_jls_byte_arbiter.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run byte-arbiter smoke test" {
|
||||
vsim -c tb_jls_byte_arbiter -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile output-buffer smoke test" {
|
||||
vlog -sv fpga/verilog/jls_output_buffer.sv fpga/sim/tb_jls_output_buffer.sv
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "run output-buffer smoke test" {
|
||||
vsim -c tb_jls_output_buffer -do "run -all; quit"
|
||||
}
|
||||
|
||||
Write-Host "[jls-smoke] PASS"
|
||||
90
fpga/sim/run_jls_throughput_regression.ps1
Normal file
90
fpga/sim/run_jls_throughput_regression.ps1
Normal file
@@ -0,0 +1,90 @@
|
||||
param(
|
||||
# Supported grayscale precisions to exercise. Keep the default narrow enough
|
||||
# for a staged run; full regression should pass -BitsList 8,10,12,14,16.
|
||||
[int[]] $BitsList = @(8),
|
||||
|
||||
# Dynamic-NEAR compression targets covered by the hard throughput requirement.
|
||||
[int[]] $Ratios = @(1, 2, 3),
|
||||
|
||||
# SRS default image size used for the formal throughput test.
|
||||
[int] $Width = 6144,
|
||||
[int] $Height = 256,
|
||||
[int] $ImageCount = 10,
|
||||
[int] $ScanRows = 16,
|
||||
|
||||
# PATTERN=9 rotates ten deterministic representative images.
|
||||
[int] $Pattern = 9,
|
||||
|
||||
# Utility switch for script bring-up on tiny images. Formal throughput runs
|
||||
# must leave this unset.
|
||||
[switch] $SkipThroughputCheck
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Invoke-CheckedCommand {
|
||||
param(
|
||||
[string] $Description,
|
||||
[scriptblock] $Command
|
||||
)
|
||||
|
||||
Write-Host "[jls-throughput] $Description"
|
||||
$CommandOutput = & $Command 2>&1
|
||||
$CommandExitCode = $LASTEXITCODE
|
||||
$CommandText = $CommandOutput | Out-String
|
||||
$CommandOutput | ForEach-Object { Write-Host $_ }
|
||||
|
||||
if ($CommandExitCode -ne 0) {
|
||||
exit $CommandExitCode
|
||||
}
|
||||
if (($CommandText -match "\*\* Fatal") -or ($CommandText -match "Errors[=:]\s*[1-9]")) {
|
||||
Write-Error "Command reported simulator errors: $Description"
|
||||
}
|
||||
}
|
||||
|
||||
$OutDir = Join-Path (Resolve-Path ".").Path "tools\jls_compat\out"
|
||||
New-Item -ItemType Directory -Force -Path $OutDir | Out-Null
|
||||
|
||||
$StatsPath = Join-Path $OutDir "rtl_throughput_stats.csv"
|
||||
Set-Content -Encoding ASCII -Path $StatsPath `
|
||||
-Value "case_id,pix_width,pic_col,pic_row,image_count,ratio,pattern,frame_count,output_bytes,input_reads,input_cycles,throughput_mpix_x1000"
|
||||
|
||||
$CheckThroughput = 1
|
||||
if ($SkipThroughputCheck) {
|
||||
$CheckThroughput = 0
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile RTL top throughput regression testbench" {
|
||||
vlog -sv -f fpga/verilog/jpeg_ls_rtl.f fpga/sim/tb_jpeg_ls_encoder_top_run_smoke.sv
|
||||
}
|
||||
|
||||
foreach ($Bits in $BitsList) {
|
||||
foreach ($Ratio in $Ratios) {
|
||||
$CaseId = "throughput_${Bits}b_ratio${Ratio}_${Width}x${Height}_${ImageCount}img"
|
||||
$Stem = "rtl_${CaseId}"
|
||||
$JlsPlusArg = "+OUT=tools/jls_compat/out/$Stem.jls"
|
||||
$StatsPlusArg = "+STATS=tools/jls_compat/out/rtl_throughput_stats.csv"
|
||||
$CasePlusArg = "+CASE=$CaseId"
|
||||
$PatternPlusArg = "+PATTERN=$Pattern"
|
||||
$RatioPlusArg = "+RATIO=$Ratio"
|
||||
|
||||
Invoke-CheckedCommand "run $CaseId" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke `
|
||||
"-gPIX_WIDTH=$Bits" `
|
||||
"-gPIC_COL=$Width" `
|
||||
"-gPIC_ROW=$Height" `
|
||||
"-gSCAN_ROWS=$ScanRows" `
|
||||
"-gIMAGE_COUNT=$ImageCount" `
|
||||
$PatternPlusArg `
|
||||
$RatioPlusArg `
|
||||
$JlsPlusArg `
|
||||
$StatsPlusArg `
|
||||
$CasePlusArg `
|
||||
+CHECK_THROUGHPUT=$CheckThroughput `
|
||||
-do "run -all; quit"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host "[jls-throughput] Stats: $StatsPath"
|
||||
Write-Host "[jls-throughput] PASS"
|
||||
163
fpga/sim/run_jls_top_compat_smoke.ps1
Normal file
163
fpga/sim/run_jls_top_compat_smoke.ps1
Normal file
@@ -0,0 +1,163 @@
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Invoke-CheckedCommand {
|
||||
param(
|
||||
[string] $Description,
|
||||
[scriptblock] $Command
|
||||
)
|
||||
|
||||
Write-Host "[jls-top-compat] $Description"
|
||||
$CommandOutput = & $Command 2>&1
|
||||
$CommandExitCode = $LASTEXITCODE
|
||||
$CommandText = $CommandOutput | Out-String
|
||||
$CommandOutput | ForEach-Object { Write-Host $_ }
|
||||
|
||||
if ($CommandExitCode -ne 0) {
|
||||
exit $CommandExitCode
|
||||
}
|
||||
if (($CommandText -match "\*\* Fatal") -or ($CommandText -match "Errors[=:]\s*[1-9]")) {
|
||||
Write-Error "Command reported simulator errors: $Description"
|
||||
}
|
||||
}
|
||||
|
||||
$OutDir = Join-Path (Resolve-Path ".").Path "tools\jls_compat\out"
|
||||
$DepPath = Join-Path (Resolve-Path ".").Path "tools\jls_compat\.deps"
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $OutDir | Out-Null
|
||||
|
||||
function Write-Pgm {
|
||||
param(
|
||||
[string] $Path,
|
||||
[int] $Pattern,
|
||||
[int] $BitsPerSample,
|
||||
[int] $Width,
|
||||
[int] $Height
|
||||
)
|
||||
|
||||
$MaxValue = (1 -shl $BitsPerSample) - 1
|
||||
$Header = [System.Text.Encoding]::ASCII.GetBytes("P5`n$Width $Height`n$MaxValue`n")
|
||||
|
||||
if ($BitsPerSample -le 8) {
|
||||
$Payload = [byte[]]::new($Width * $Height)
|
||||
} else {
|
||||
$Payload = [byte[]]::new($Width * $Height * 2)
|
||||
}
|
||||
|
||||
for ($Index = 0; $Index -lt ($Width * $Height); $Index = $Index + 1) {
|
||||
$Sample = 0
|
||||
$X = $Index % $Width
|
||||
$Y = [int][Math]::Floor($Index / $Width)
|
||||
|
||||
switch ($Pattern) {
|
||||
1 {
|
||||
$Sample = $Index -band $MaxValue
|
||||
}
|
||||
2 {
|
||||
if ((($X -band 1) -bxor ($Y -band 1)) -ne 0) {
|
||||
$Sample = $MaxValue
|
||||
}
|
||||
}
|
||||
default {
|
||||
$Sample = 0
|
||||
}
|
||||
}
|
||||
|
||||
if ($BitsPerSample -le 8) {
|
||||
$Payload[$Index] = [byte] $Sample
|
||||
} else {
|
||||
$Payload[2 * $Index] = [byte] (($Sample -shr 8) -band 255)
|
||||
$Payload[(2 * $Index) + 1] = [byte] ($Sample -band 255)
|
||||
}
|
||||
}
|
||||
|
||||
$PgmBytes = [byte[]]::new($Header.Length + $Payload.Length)
|
||||
[System.Array]::Copy($Header, 0, $PgmBytes, 0, $Header.Length)
|
||||
[System.Array]::Copy($Payload, 0, $PgmBytes, $Header.Length, $Payload.Length)
|
||||
[System.IO.File]::WriteAllBytes($Path, $PgmBytes)
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "compile RTL top compatibility smoke" {
|
||||
vlog -sv -f fpga/verilog/jpeg_ls_rtl.f fpga/sim/tb_jpeg_ls_encoder_top_run_smoke.sv
|
||||
}
|
||||
|
||||
$env:JLS_COMPAT_PYDEPS = $DepPath
|
||||
|
||||
foreach ($Bits in @(8, 10, 12, 14, 16)) {
|
||||
foreach ($Pattern in @(0, 1)) {
|
||||
$PatternName = "zero"
|
||||
if ($Pattern -eq 1) {
|
||||
$PatternName = "ramp"
|
||||
}
|
||||
|
||||
$Stem = "rtl_top_${PatternName}_${Bits}b"
|
||||
$JlsPath = Join-Path $OutDir "$Stem.jls"
|
||||
$PgmPath = Join-Path $OutDir "$Stem.pgm"
|
||||
$JlsPlusArg = "+OUT=tools/jls_compat/out/$Stem.jls"
|
||||
$PatternPlusArg = "+PATTERN=$Pattern"
|
||||
$WidthGeneric = "-gPIX_WIDTH=$Bits"
|
||||
|
||||
Write-Pgm $PgmPath $Pattern $Bits 16 16
|
||||
|
||||
Invoke-CheckedCommand "run RTL top compatibility smoke: ${Bits}b $PatternName image" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke $WidthGeneric $PatternPlusArg $JlsPlusArg -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "decode ${Bits}b $PatternName RTL output with CharLS and compare reference PGM" {
|
||||
python tools/jls_compat/reference_decode_compare.py $JlsPath --reference-pgm $PgmPath --skip-libjpeg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$CheckerStem = "rtl_top_checker_8b"
|
||||
$CheckerJlsPath = Join-Path $OutDir "$CheckerStem.jls"
|
||||
$CheckerPgmPath = Join-Path $OutDir "$CheckerStem.pgm"
|
||||
Write-Pgm $CheckerPgmPath 2 8 16 16
|
||||
|
||||
Invoke-CheckedCommand "run RTL top compatibility smoke: 8b checker image" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke -gPIX_WIDTH=8 +PATTERN=2 +OUT=tools/jls_compat/out/$CheckerStem.jls -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "decode 8b checker RTL output with CharLS and compare reference PGM" {
|
||||
python tools/jls_compat/reference_decode_compare.py $CheckerJlsPath --reference-pgm $CheckerPgmPath --skip-libjpeg
|
||||
}
|
||||
|
||||
$MultiStripStem = "rtl_top_ramp_8b_2strip"
|
||||
$MultiStripJlsPath = Join-Path $OutDir "$MultiStripStem.jls"
|
||||
$MultiStripPgmPath = Join-Path $OutDir "$MultiStripStem.pgm"
|
||||
Write-Pgm $MultiStripPgmPath 1 8 16 32
|
||||
|
||||
Invoke-CheckedCommand "run RTL top compatibility smoke: 8b ramp two-strip image" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke -gPIX_WIDTH=8 -gPIC_ROW=32 +PATTERN=1 +OUT=tools/jls_compat/out/$MultiStripStem.jls -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "decode 8b ramp two-strip RTL output with CharLS and compare reference PGM" {
|
||||
python tools/jls_compat/reference_decode_compare.py $MultiStripJlsPath --reference-pgm $MultiStripPgmPath --skip-libjpeg --split-frames --expected-frames 2
|
||||
}
|
||||
|
||||
$MultiImageStem = "rtl_top_zero_8b_2image"
|
||||
$MultiImageJlsPath = Join-Path $OutDir "$MultiImageStem.jls"
|
||||
$MultiImagePgmPath = Join-Path $OutDir "$MultiImageStem.pgm"
|
||||
Write-Pgm $MultiImagePgmPath 0 8 16 32
|
||||
|
||||
Invoke-CheckedCommand "run RTL top compatibility smoke: 8b zero two-image stream" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke -gPIX_WIDTH=8 -gIMAGE_COUNT=2 +PATTERN=0 +OUT=tools/jls_compat/out/$MultiImageStem.jls -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "decode 8b zero two-image RTL output with CharLS and compare stitched reference PGM" {
|
||||
python tools/jls_compat/reference_decode_compare.py $MultiImageJlsPath --reference-pgm $MultiImagePgmPath --skip-libjpeg --split-frames --expected-frames 2
|
||||
}
|
||||
|
||||
$NearStripStem = "rtl_top_ramp_8b_2strip_ratio2"
|
||||
$NearStripJlsPath = Join-Path $OutDir "$NearStripStem.jls"
|
||||
$NearStripPgmPath = Join-Path $OutDir "$NearStripStem.pgm"
|
||||
Write-Pgm $NearStripPgmPath 1 8 16 32
|
||||
|
||||
Invoke-CheckedCommand "run RTL top compatibility smoke: 8b ramp two-strip ratio=2 image" {
|
||||
vsim -c tb_jpeg_ls_encoder_top_run_smoke -gPIX_WIDTH=8 -gPIC_ROW=32 +PATTERN=1 +RATIO=2 +OUT=tools/jls_compat/out/$NearStripStem.jls -do "run -all; quit"
|
||||
}
|
||||
|
||||
Invoke-CheckedCommand "decode 8b ramp two-strip ratio=2 RTL output with CharLS and compare reference PGM within NEAR bound" {
|
||||
python tools/jls_compat/reference_decode_compare.py $NearStripJlsPath --reference-pgm $NearStripPgmPath --skip-libjpeg --split-frames --expected-frames 2 --max-abs-diff 31
|
||||
}
|
||||
|
||||
Write-Host "[jls-top-compat] PASS"
|
||||
199
fpga/sim/tb_jls_bit_packer.sv
Normal file
199
fpga/sim/tb_jls_bit_packer.sv
Normal file
@@ -0,0 +1,199 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 entropy-coded segment syntax
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : JPEG-LS bitstream packing and marker/zero-bit stuffing
|
||||
// Example : Checks FF 7F stuffing and partial-byte flush behavior.
|
||||
//
|
||||
// Smoke test for jls_bit_packer.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_bit_packer;
|
||||
|
||||
// Code-event width used by the smoke test.
|
||||
localparam int MAX_CODE_BITS = 64;
|
||||
|
||||
// Expected byte count:
|
||||
// A5, FF, 7F, A0, FF, 00
|
||||
localparam int EXPECTED_BYTE_COUNT = 6;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Code input interface.
|
||||
logic code_valid;
|
||||
logic code_ready;
|
||||
logic [MAX_CODE_BITS-1:0] code_bits;
|
||||
logic [6:0] code_bit_count;
|
||||
|
||||
// Flush interface.
|
||||
logic flush_valid;
|
||||
logic flush_ready;
|
||||
logic flush_done;
|
||||
|
||||
// Byte output interface.
|
||||
logic byte_valid;
|
||||
logic byte_ready;
|
||||
logic [7:0] byte_data;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [7:0] expected_byte_mem [0:EXPECTED_BYTE_COUNT-1];
|
||||
int receive_index;
|
||||
int flush_done_count;
|
||||
logic done_seen;
|
||||
|
||||
jls_bit_packer #(
|
||||
.MAX_CODE_BITS(MAX_CODE_BITS)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.code_valid(code_valid),
|
||||
.code_ready(code_ready),
|
||||
.code_bits(code_bits),
|
||||
.code_bit_count(code_bit_count),
|
||||
.flush_valid(flush_valid),
|
||||
.flush_ready(flush_ready),
|
||||
.flush_done(flush_done),
|
||||
.byte_valid(byte_valid),
|
||||
.byte_ready(byte_ready),
|
||||
.byte_data(byte_data)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_byte_mem[0] = 8'hA5;
|
||||
expected_byte_mem[1] = 8'hFF;
|
||||
expected_byte_mem[2] = 8'h7F;
|
||||
expected_byte_mem[3] = 8'hA0;
|
||||
expected_byte_mem[4] = 8'hFF;
|
||||
expected_byte_mem[5] = 8'h00;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
code_valid = 1'b0;
|
||||
code_bits = {MAX_CODE_BITS{1'b0}};
|
||||
code_bit_count = 7'd0;
|
||||
flush_valid = 1'b0;
|
||||
byte_ready = 1'b1;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (code_ready);
|
||||
@(posedge clk);
|
||||
code_valid = 1'b1;
|
||||
code_bits = 64'hA500_0000_0000_0000;
|
||||
code_bit_count = 7'd8;
|
||||
@(posedge clk);
|
||||
code_valid = 1'b0;
|
||||
code_bits = 64'h0000_0000_0000_0000;
|
||||
code_bit_count = 7'd0;
|
||||
|
||||
wait (code_ready);
|
||||
@(posedge clk);
|
||||
code_valid = 1'b1;
|
||||
code_bits = 64'hFF00_0000_0000_0000;
|
||||
code_bit_count = 7'd8;
|
||||
@(posedge clk);
|
||||
code_valid = 1'b0;
|
||||
code_bits = 64'h0000_0000_0000_0000;
|
||||
code_bit_count = 7'd0;
|
||||
|
||||
wait (code_ready);
|
||||
@(posedge clk);
|
||||
code_valid = 1'b1;
|
||||
code_bits = 64'hFE00_0000_0000_0000;
|
||||
code_bit_count = 7'd7;
|
||||
@(posedge clk);
|
||||
code_valid = 1'b0;
|
||||
code_bits = 64'h0000_0000_0000_0000;
|
||||
code_bit_count = 7'd0;
|
||||
|
||||
wait (code_ready);
|
||||
@(posedge clk);
|
||||
code_valid = 1'b1;
|
||||
code_bits = 64'hA000_0000_0000_0000;
|
||||
code_bit_count = 7'd4;
|
||||
@(posedge clk);
|
||||
code_valid = 1'b0;
|
||||
code_bits = 64'h0000_0000_0000_0000;
|
||||
code_bit_count = 7'd0;
|
||||
|
||||
wait (flush_ready);
|
||||
@(posedge clk);
|
||||
flush_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
flush_valid = 1'b0;
|
||||
|
||||
wait (flush_done_count == 1);
|
||||
|
||||
wait (code_ready);
|
||||
@(posedge clk);
|
||||
code_valid = 1'b1;
|
||||
code_bits = 64'hFF00_0000_0000_0000;
|
||||
code_bit_count = 7'd8;
|
||||
@(posedge clk);
|
||||
code_valid = 1'b0;
|
||||
code_bits = 64'h0000_0000_0000_0000;
|
||||
code_bit_count = 7'd0;
|
||||
|
||||
wait (flush_ready);
|
||||
@(posedge clk);
|
||||
flush_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
flush_valid = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
flush_done_count <= 0;
|
||||
done_seen <= 1'b0;
|
||||
end else begin
|
||||
if (byte_valid && byte_ready) begin
|
||||
if (receive_index >= EXPECTED_BYTE_COUNT) begin
|
||||
$fatal(1, "Unexpected extra packed byte 0x%02h", byte_data);
|
||||
end
|
||||
|
||||
if (byte_data !== expected_byte_mem[receive_index]) begin
|
||||
$fatal(1, "packed byte mismatch at %0d: got 0x%02h expected 0x%02h",
|
||||
receive_index, byte_data, expected_byte_mem[receive_index]);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
|
||||
if (flush_done) begin
|
||||
flush_done_count <= flush_done_count + 1;
|
||||
if (flush_done_count == 1) begin
|
||||
done_seen <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for bit packer smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (receive_index !== EXPECTED_BYTE_COUNT || flush_done_count !== 2) begin
|
||||
$fatal(1, "bit packer smoke count mismatch");
|
||||
end
|
||||
$display("PASS: tb_jls_bit_packer");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
78
fpga/sim/tb_jls_byte_arbiter.sv
Normal file
78
fpga/sim/tb_jls_byte_arbiter.sv
Normal file
@@ -0,0 +1,78 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 marker stream byte order
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Marker bytes and entropy-coded bytes in stream order
|
||||
// Example : Header byte has priority over a waiting payload byte.
|
||||
//
|
||||
// Smoke test for jls_byte_arbiter.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_byte_arbiter;
|
||||
|
||||
logic header_valid;
|
||||
logic header_ready;
|
||||
logic [7:0] header_data;
|
||||
logic header_original_image_start;
|
||||
logic payload_valid;
|
||||
logic payload_ready;
|
||||
logic [7:0] payload_data;
|
||||
logic byte_valid;
|
||||
logic byte_ready;
|
||||
logic [7:0] byte_data;
|
||||
logic original_image_start;
|
||||
|
||||
jls_byte_arbiter dut (
|
||||
.header_valid(header_valid),
|
||||
.header_ready(header_ready),
|
||||
.header_data(header_data),
|
||||
.header_original_image_start(header_original_image_start),
|
||||
.payload_valid(payload_valid),
|
||||
.payload_ready(payload_ready),
|
||||
.payload_data(payload_data),
|
||||
.byte_valid(byte_valid),
|
||||
.byte_ready(byte_ready),
|
||||
.byte_data(byte_data),
|
||||
.original_image_start(original_image_start)
|
||||
);
|
||||
|
||||
initial begin
|
||||
header_valid = 1'b0;
|
||||
header_data = 8'h00;
|
||||
header_original_image_start = 1'b0;
|
||||
payload_valid = 1'b0;
|
||||
payload_data = 8'h00;
|
||||
byte_ready = 1'b1;
|
||||
|
||||
#1;
|
||||
payload_valid = 1'b1;
|
||||
payload_data = 8'h55;
|
||||
#1;
|
||||
if (!byte_valid || !payload_ready || header_ready ||
|
||||
byte_data !== 8'h55 || original_image_start !== 1'b0) begin
|
||||
$fatal(1, "payload-only arbitration mismatch");
|
||||
end
|
||||
|
||||
header_valid = 1'b1;
|
||||
header_data = 8'hFF;
|
||||
header_original_image_start = 1'b1;
|
||||
#1;
|
||||
if (!byte_valid || !header_ready || payload_ready ||
|
||||
byte_data !== 8'hFF || original_image_start !== 1'b1) begin
|
||||
$fatal(1, "header-priority arbitration mismatch");
|
||||
end
|
||||
|
||||
byte_ready = 1'b0;
|
||||
#1;
|
||||
if (header_ready || payload_ready) begin
|
||||
$fatal(1, "ready should be low when downstream stalls");
|
||||
end
|
||||
|
||||
$display("PASS: tb_jls_byte_arbiter");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
129
fpga/sim/tb_jls_coding_params.sv
Normal file
129
fpga/sim/tb_jls_coding_params.sv
Normal file
@@ -0,0 +1,129 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : RANGE, qbpp, and LIMIT derivation from MAXVAL and NEAR
|
||||
// Example : Checks representative PIX_WIDTH and NEAR table entries.
|
||||
//
|
||||
// Smoke test for jls_coding_params.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_coding_params;
|
||||
|
||||
// NEAR stimulus shared by all parameterized instances.
|
||||
logic [5:0] near_8;
|
||||
logic [5:0] near_10;
|
||||
logic [5:0] near_12;
|
||||
logic [5:0] near_14;
|
||||
logic [5:0] near_16;
|
||||
|
||||
// DUT outputs.
|
||||
logic [16:0] range_8;
|
||||
logic [16:0] range_10;
|
||||
logic [16:0] range_12;
|
||||
logic [16:0] range_14;
|
||||
logic [16:0] range_16;
|
||||
logic [4:0] qbpp_8;
|
||||
logic [4:0] qbpp_10;
|
||||
logic [4:0] qbpp_12;
|
||||
logic [4:0] qbpp_14;
|
||||
logic [4:0] qbpp_16;
|
||||
logic [6:0] limit_8;
|
||||
logic [6:0] limit_10;
|
||||
logic [6:0] limit_12;
|
||||
logic [6:0] limit_14;
|
||||
logic [6:0] limit_16;
|
||||
|
||||
jls_coding_params #(
|
||||
.PIX_WIDTH(8)
|
||||
) dut_8 (
|
||||
.NEAR(near_8),
|
||||
.RANGE(range_8),
|
||||
.qbpp(qbpp_8),
|
||||
.LIMIT(limit_8)
|
||||
);
|
||||
|
||||
jls_coding_params #(
|
||||
.PIX_WIDTH(10)
|
||||
) dut_10 (
|
||||
.NEAR(near_10),
|
||||
.RANGE(range_10),
|
||||
.qbpp(qbpp_10),
|
||||
.LIMIT(limit_10)
|
||||
);
|
||||
|
||||
jls_coding_params #(
|
||||
.PIX_WIDTH(12)
|
||||
) dut_12 (
|
||||
.NEAR(near_12),
|
||||
.RANGE(range_12),
|
||||
.qbpp(qbpp_12),
|
||||
.LIMIT(limit_12)
|
||||
);
|
||||
|
||||
jls_coding_params #(
|
||||
.PIX_WIDTH(14)
|
||||
) dut_14 (
|
||||
.NEAR(near_14),
|
||||
.RANGE(range_14),
|
||||
.qbpp(qbpp_14),
|
||||
.LIMIT(limit_14)
|
||||
);
|
||||
|
||||
jls_coding_params #(
|
||||
.PIX_WIDTH(16)
|
||||
) dut_16 (
|
||||
.NEAR(near_16),
|
||||
.RANGE(range_16),
|
||||
.qbpp(qbpp_16),
|
||||
.LIMIT(limit_16)
|
||||
);
|
||||
|
||||
initial begin
|
||||
near_8 = 6'd0;
|
||||
near_10 = 6'd16;
|
||||
near_12 = 6'd31;
|
||||
near_14 = 6'd8;
|
||||
near_16 = 6'd0;
|
||||
#1;
|
||||
|
||||
if (range_8 !== 17'd256 || qbpp_8 !== 5'd8 || limit_8 !== 7'd32) begin
|
||||
$fatal(1, "8-bit NEAR=0 coding params mismatch");
|
||||
end
|
||||
|
||||
if (range_10 !== 17'd32 || qbpp_10 !== 5'd5 || limit_10 !== 7'd40) begin
|
||||
$fatal(1, "10-bit NEAR=16 coding params mismatch");
|
||||
end
|
||||
|
||||
if (range_12 !== 17'd66 || qbpp_12 !== 5'd7 || limit_12 !== 7'd48) begin
|
||||
$fatal(1, "12-bit NEAR=31 coding params mismatch");
|
||||
end
|
||||
|
||||
if (range_14 !== 17'd965 || qbpp_14 !== 5'd10 || limit_14 !== 7'd56) begin
|
||||
$fatal(1, "14-bit NEAR=8 coding params mismatch");
|
||||
end
|
||||
|
||||
if (range_16 !== 17'd65536 || qbpp_16 !== 5'd16 || limit_16 !== 7'd64) begin
|
||||
$fatal(1, "16-bit NEAR=0 coding params mismatch");
|
||||
end
|
||||
|
||||
near_8 = 6'd63;
|
||||
near_16 = 6'd31;
|
||||
#1;
|
||||
|
||||
if (range_8 !== 17'd6 || qbpp_8 !== 5'd3 || limit_8 !== 7'd32) begin
|
||||
$fatal(1, "8-bit defensive NEAR clamp mismatch");
|
||||
end
|
||||
|
||||
if (range_16 !== 17'd1042 || qbpp_16 !== 5'd11 || limit_16 !== 7'd64) begin
|
||||
$fatal(1, "16-bit NEAR=31 coding params mismatch");
|
||||
end
|
||||
|
||||
$display("PASS: tb_jls_coding_params");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
193
fpga/sim/tb_jls_context_memory.sv
Normal file
193
fpga/sim/tb_jls_context_memory.sv
Normal file
@@ -0,0 +1,193 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex A.6 variables update
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Regular-mode context arrays A/B/C/N
|
||||
// Example : RANGE=256 initializes A to 4 for all 365 contexts.
|
||||
//
|
||||
// Smoke test for jls_context_memory.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_context_memory;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Initialization interface.
|
||||
logic init_valid;
|
||||
logic init_ready;
|
||||
logic [16:0] init_RANGE;
|
||||
logic init_busy;
|
||||
logic init_done;
|
||||
|
||||
// Read interface.
|
||||
logic read_valid;
|
||||
logic read_ready;
|
||||
logic [8:0] read_context_index;
|
||||
logic read_result_valid;
|
||||
logic read_result_ready;
|
||||
logic [8:0] read_result_context_index;
|
||||
logic [31:0] read_A;
|
||||
logic signed [31:0] read_B;
|
||||
logic signed [8:0] read_C;
|
||||
logic [15:0] read_N;
|
||||
|
||||
// Write interface.
|
||||
logic write_valid;
|
||||
logic write_ready;
|
||||
logic [8:0] write_context_index;
|
||||
logic [31:0] write_A;
|
||||
logic signed [31:0] write_B;
|
||||
logic signed [8:0] write_C;
|
||||
logic [15:0] write_N;
|
||||
|
||||
logic all_done_seen;
|
||||
|
||||
jls_context_memory dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.init_valid(init_valid),
|
||||
.init_ready(init_ready),
|
||||
.init_RANGE(init_RANGE),
|
||||
.init_busy(init_busy),
|
||||
.init_done(init_done),
|
||||
.read_valid(read_valid),
|
||||
.read_ready(read_ready),
|
||||
.read_context_index(read_context_index),
|
||||
.read_result_valid(read_result_valid),
|
||||
.read_result_ready(read_result_ready),
|
||||
.read_result_context_index(read_result_context_index),
|
||||
.read_A(read_A),
|
||||
.read_B(read_B),
|
||||
.read_C(read_C),
|
||||
.read_N(read_N),
|
||||
.write_valid(write_valid),
|
||||
.write_ready(write_ready),
|
||||
.write_context_index(write_context_index),
|
||||
.write_A(write_A),
|
||||
.write_B(write_B),
|
||||
.write_C(write_C),
|
||||
.write_N(write_N)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
init_valid = 1'b0;
|
||||
init_RANGE = 17'd0;
|
||||
read_valid = 1'b0;
|
||||
read_context_index = 9'd0;
|
||||
read_result_ready = 1'b1;
|
||||
write_valid = 1'b0;
|
||||
write_context_index = 9'd0;
|
||||
write_A = 32'd0;
|
||||
write_B = 32'sd0;
|
||||
write_C = 9'sd0;
|
||||
write_N = 16'd0;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (init_ready);
|
||||
@(posedge clk);
|
||||
init_valid = 1'b1;
|
||||
init_RANGE = 17'd256;
|
||||
@(posedge clk);
|
||||
init_valid = 1'b0;
|
||||
init_RANGE = 17'd65536;
|
||||
|
||||
wait (init_done);
|
||||
repeat (2) @(posedge clk);
|
||||
|
||||
read_context_index = 9'd0;
|
||||
read_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
read_valid = 1'b0;
|
||||
wait (read_result_valid);
|
||||
if (read_result_context_index !== 9'd0 ||
|
||||
read_A !== 32'd4 || read_B !== 32'sd0 ||
|
||||
read_C !== 9'sd0 || read_N !== 16'd1) begin
|
||||
$fatal(1, "context 0 init read mismatch");
|
||||
end
|
||||
@(posedge clk);
|
||||
|
||||
read_context_index = 9'd364;
|
||||
read_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
read_valid = 1'b0;
|
||||
wait (read_result_valid);
|
||||
if (read_result_context_index !== 9'd364 ||
|
||||
read_A !== 32'd4 || read_B !== 32'sd0 ||
|
||||
read_C !== 9'sd0 || read_N !== 16'd1) begin
|
||||
$fatal(1, "context 364 init read mismatch");
|
||||
end
|
||||
@(posedge clk);
|
||||
|
||||
wait (write_ready);
|
||||
@(posedge clk);
|
||||
write_valid = 1'b1;
|
||||
write_context_index = 9'd5;
|
||||
write_A = 32'd7;
|
||||
write_B = -32'sd2;
|
||||
write_C = 9'sd1;
|
||||
write_N = 16'd3;
|
||||
@(posedge clk);
|
||||
write_valid = 1'b0;
|
||||
|
||||
read_context_index = 9'd5;
|
||||
read_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
read_valid = 1'b0;
|
||||
wait (read_result_valid);
|
||||
if (read_result_context_index !== 9'd5 ||
|
||||
read_A !== 32'd7 || read_B !== -32'sd2 ||
|
||||
read_C !== 9'sd1 || read_N !== 16'd3) begin
|
||||
$fatal(1, "context 5 write/read mismatch");
|
||||
end
|
||||
@(posedge clk);
|
||||
|
||||
wait (init_ready);
|
||||
@(posedge clk);
|
||||
init_valid = 1'b1;
|
||||
init_RANGE = 17'd65536;
|
||||
@(posedge clk);
|
||||
init_valid = 1'b0;
|
||||
wait (init_done);
|
||||
repeat (2) @(posedge clk);
|
||||
|
||||
read_context_index = 9'd5;
|
||||
read_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
read_valid = 1'b0;
|
||||
wait (read_result_valid);
|
||||
if (read_result_context_index !== 9'd5 ||
|
||||
read_A !== 32'd1024 || read_B !== 32'sd0 ||
|
||||
read_C !== 9'sd0 || read_N !== 16'd1) begin
|
||||
$fatal(1, "context 5 re-init mismatch");
|
||||
end
|
||||
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (3000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for context memory smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_context_memory");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
286
fpga/sim/tb_jls_context_model.sv
Normal file
286
fpga/sim/tb_jls_context_model.sv
Normal file
@@ -0,0 +1,286 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex A.3 context determination
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Read regular-mode context variables A/B/C/N for context Q
|
||||
// Example : Reads context 5 before and after writeback.
|
||||
//
|
||||
// Smoke test for jls_context_model.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_context_model;
|
||||
|
||||
// Test precision.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Init interface.
|
||||
logic init_valid;
|
||||
logic init_ready;
|
||||
logic [16:0] init_RANGE;
|
||||
logic init_busy;
|
||||
logic init_done;
|
||||
|
||||
// Context input.
|
||||
logic context_valid;
|
||||
logic context_ready;
|
||||
logic [PIX_WIDTH-1:0] context_sample;
|
||||
logic [12:0] context_x;
|
||||
logic [12:0] context_y;
|
||||
logic context_strip_first_pixel;
|
||||
logic context_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] context_Px;
|
||||
logic [8:0] context_index;
|
||||
logic context_negative;
|
||||
logic run_mode_context;
|
||||
|
||||
// Context variable output.
|
||||
logic context_vars_valid;
|
||||
logic context_vars_ready;
|
||||
logic [PIX_WIDTH-1:0] vars_sample;
|
||||
logic [12:0] vars_x;
|
||||
logic [12:0] vars_y;
|
||||
logic vars_strip_first_pixel;
|
||||
logic vars_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] vars_Px;
|
||||
logic [8:0] vars_context_index;
|
||||
logic vars_context_negative;
|
||||
logic vars_run_mode_context;
|
||||
logic [31:0] vars_A;
|
||||
logic signed [31:0] vars_B;
|
||||
logic signed [8:0] vars_C;
|
||||
logic [15:0] vars_N;
|
||||
|
||||
// Writeback interface.
|
||||
logic write_valid;
|
||||
logic write_ready;
|
||||
logic [8:0] write_context_index;
|
||||
logic [31:0] write_A;
|
||||
logic signed [31:0] write_B;
|
||||
logic signed [8:0] write_C;
|
||||
logic [15:0] write_N;
|
||||
|
||||
logic all_done_seen;
|
||||
|
||||
jls_context_model #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.init_valid(init_valid),
|
||||
.init_ready(init_ready),
|
||||
.init_RANGE(init_RANGE),
|
||||
.init_busy(init_busy),
|
||||
.init_done(init_done),
|
||||
.context_valid(context_valid),
|
||||
.context_ready(context_ready),
|
||||
.context_sample(context_sample),
|
||||
.context_x(context_x),
|
||||
.context_y(context_y),
|
||||
.context_strip_first_pixel(context_strip_first_pixel),
|
||||
.context_strip_last_pixel(context_strip_last_pixel),
|
||||
.context_Px(context_Px),
|
||||
.context_index(context_index),
|
||||
.context_negative(context_negative),
|
||||
.run_mode_context(run_mode_context),
|
||||
.context_vars_valid(context_vars_valid),
|
||||
.context_vars_ready(context_vars_ready),
|
||||
.vars_sample(vars_sample),
|
||||
.vars_x(vars_x),
|
||||
.vars_y(vars_y),
|
||||
.vars_strip_first_pixel(vars_strip_first_pixel),
|
||||
.vars_strip_last_pixel(vars_strip_last_pixel),
|
||||
.vars_Px(vars_Px),
|
||||
.vars_context_index(vars_context_index),
|
||||
.vars_context_negative(vars_context_negative),
|
||||
.vars_run_mode_context(vars_run_mode_context),
|
||||
.vars_A(vars_A),
|
||||
.vars_B(vars_B),
|
||||
.vars_C(vars_C),
|
||||
.vars_N(vars_N),
|
||||
.write_valid(write_valid),
|
||||
.write_ready(write_ready),
|
||||
.write_context_index(write_context_index),
|
||||
.write_A(write_A),
|
||||
.write_B(write_B),
|
||||
.write_C(write_C),
|
||||
.write_N(write_N)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
init_valid = 1'b0;
|
||||
init_RANGE = 17'd0;
|
||||
context_valid = 1'b0;
|
||||
context_sample = 8'd0;
|
||||
context_x = 13'd0;
|
||||
context_y = 13'd0;
|
||||
context_strip_first_pixel = 1'b0;
|
||||
context_strip_last_pixel = 1'b0;
|
||||
context_Px = 8'd0;
|
||||
context_index = 9'd0;
|
||||
context_negative = 1'b0;
|
||||
run_mode_context = 1'b0;
|
||||
context_vars_ready = 1'b1;
|
||||
write_valid = 1'b0;
|
||||
write_context_index = 9'd0;
|
||||
write_A = 32'd0;
|
||||
write_B = 32'sd0;
|
||||
write_C = 9'sd0;
|
||||
write_N = 16'd0;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (init_ready);
|
||||
@(posedge clk);
|
||||
init_valid = 1'b1;
|
||||
init_RANGE = 17'd256;
|
||||
@(posedge clk);
|
||||
init_valid = 1'b0;
|
||||
wait (init_done);
|
||||
repeat (2) @(posedge clk);
|
||||
|
||||
@(negedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd33;
|
||||
context_x = 13'd2;
|
||||
context_y = 13'd3;
|
||||
context_strip_first_pixel = 1'b1;
|
||||
context_Px = 8'd31;
|
||||
context_index = 9'd5;
|
||||
context_negative = 1'b0;
|
||||
run_mode_context = 1'b0;
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
context_strip_first_pixel = 1'b0;
|
||||
|
||||
wait (context_vars_valid);
|
||||
#1;
|
||||
if (vars_context_index !== 9'd5 || vars_sample !== 8'd33 ||
|
||||
vars_x !== 13'd2 || vars_y !== 13'd3 ||
|
||||
vars_Px !== 8'd31 || vars_A !== 32'd4 ||
|
||||
vars_B !== 32'sd0 || vars_C !== 9'sd0 || vars_N !== 16'd1 ||
|
||||
vars_strip_first_pixel !== 1'b1) begin
|
||||
$fatal(1, "first context_model read mismatch");
|
||||
end
|
||||
wait (!context_vars_valid);
|
||||
repeat (2) @(posedge clk);
|
||||
|
||||
wait (write_ready);
|
||||
@(posedge clk);
|
||||
write_valid = 1'b1;
|
||||
write_context_index = 9'd5;
|
||||
write_A = 32'd7;
|
||||
write_B = -32'sd2;
|
||||
write_C = 9'sd3;
|
||||
write_N = 16'd4;
|
||||
@(posedge clk);
|
||||
write_valid = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd44;
|
||||
context_x = 13'd4;
|
||||
context_y = 13'd5;
|
||||
context_Px = 8'd40;
|
||||
context_index = 9'd5;
|
||||
context_negative = 1'b1;
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
|
||||
wait (context_vars_valid && vars_sample == 8'd44);
|
||||
#1;
|
||||
if (vars_context_index !== 9'd5 || vars_sample !== 8'd44 ||
|
||||
vars_x !== 13'd4 || vars_y !== 13'd5 ||
|
||||
vars_Px !== 8'd40 || vars_A !== 32'd7 ||
|
||||
vars_B !== -32'sd2 || vars_C !== 9'sd3 || vars_N !== 16'd4 ||
|
||||
vars_context_negative !== 1'b1) begin
|
||||
$fatal(1, "second context_model read mismatch");
|
||||
end
|
||||
wait (!context_vars_valid);
|
||||
repeat (2) @(posedge clk);
|
||||
|
||||
@(negedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd55;
|
||||
context_x = 13'd6;
|
||||
context_y = 13'd7;
|
||||
context_Px = 8'd50;
|
||||
context_index = 9'd6;
|
||||
context_negative = 1'b0;
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
|
||||
wait (context_vars_valid && vars_sample == 8'd55);
|
||||
#1;
|
||||
if (vars_context_index !== 9'd6 || vars_A !== 32'd4 ||
|
||||
vars_B !== 32'sd0 || vars_C !== 9'sd0 || vars_N !== 16'd1) begin
|
||||
$fatal(1, "third context_model read mismatch");
|
||||
end
|
||||
wait (!context_vars_valid);
|
||||
repeat (2) @(posedge clk);
|
||||
|
||||
@(negedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd66;
|
||||
context_x = 13'd8;
|
||||
context_y = 13'd9;
|
||||
context_Px = 8'd60;
|
||||
context_index = 9'd6;
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
repeat (2) @(posedge clk);
|
||||
if (context_vars_valid && vars_sample == 8'd66) begin
|
||||
$fatal(1, "same-context hazard read returned before writeback");
|
||||
end
|
||||
|
||||
@(negedge clk);
|
||||
write_valid = 1'b1;
|
||||
write_context_index = 9'd6;
|
||||
write_A = 32'd11;
|
||||
write_B = -32'sd5;
|
||||
write_C = 9'sd4;
|
||||
write_N = 16'd8;
|
||||
@(posedge clk);
|
||||
write_valid = 1'b0;
|
||||
|
||||
wait (context_vars_valid && vars_sample == 8'd66);
|
||||
#1;
|
||||
if (vars_context_index !== 9'd6 || vars_A !== 32'd11 ||
|
||||
vars_B !== -32'sd5 || vars_C !== 9'sd4 || vars_N !== 16'd8) begin
|
||||
$fatal(1, "same-cycle bypass context_model read mismatch");
|
||||
end
|
||||
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (3000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for context model smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_context_model");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
282
fpga/sim/tb_jls_context_quantizer.sv
Normal file
282
fpga/sim/tb_jls_context_quantizer.sv
Normal file
@@ -0,0 +1,282 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex G.1 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Quantize D1/D2/D3 into Q1/Q2/Q3 and compute context ID
|
||||
// Example : Checks zero, positive, negative, and NEAR-zero gradients.
|
||||
//
|
||||
// Smoke test for jls_context_quantizer.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_context_quantizer;
|
||||
|
||||
// Test precision.
|
||||
localparam int PIX_WIDTH = 16;
|
||||
|
||||
// Expected context events.
|
||||
localparam int EXPECTED_EVENT_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Predicted input event.
|
||||
logic predict_valid;
|
||||
logic predict_ready;
|
||||
logic [PIX_WIDTH-1:0] predict_sample;
|
||||
logic [12:0] predict_x;
|
||||
logic [12:0] predict_y;
|
||||
logic predict_strip_first_pixel;
|
||||
logic predict_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
logic [PIX_WIDTH-1:0] Rc;
|
||||
logic [PIX_WIDTH-1:0] Rd;
|
||||
logic [PIX_WIDTH-1:0] Px;
|
||||
logic [15:0] T1;
|
||||
logic [15:0] T2;
|
||||
logic [15:0] T3;
|
||||
logic [5:0] NEAR;
|
||||
|
||||
// Quantized context output event.
|
||||
logic context_valid;
|
||||
logic context_ready;
|
||||
logic [PIX_WIDTH-1:0] context_sample;
|
||||
logic [12:0] context_x;
|
||||
logic [12:0] context_y;
|
||||
logic context_strip_first_pixel;
|
||||
logic context_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] context_Px;
|
||||
logic [PIX_WIDTH-1:0] context_Ra;
|
||||
logic [PIX_WIDTH-1:0] context_Rb;
|
||||
logic [PIX_WIDTH-1:0] context_Rc;
|
||||
logic [PIX_WIDTH-1:0] context_Rd;
|
||||
logic signed [3:0] Q1;
|
||||
logic signed [3:0] Q2;
|
||||
logic signed [3:0] Q3;
|
||||
logic [8:0] context_index;
|
||||
logic context_negative;
|
||||
logic run_mode_context;
|
||||
|
||||
// Scoreboard state.
|
||||
logic signed [3:0] expected_q1_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic signed [3:0] expected_q2_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic signed [3:0] expected_q3_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [8:0] expected_index_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic expected_negative_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic expected_run_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_context_quantizer #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.predict_valid(predict_valid),
|
||||
.predict_ready(predict_ready),
|
||||
.predict_sample(predict_sample),
|
||||
.predict_x(predict_x),
|
||||
.predict_y(predict_y),
|
||||
.predict_strip_first_pixel(predict_strip_first_pixel),
|
||||
.predict_strip_last_pixel(predict_strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.Rc(Rc),
|
||||
.Rd(Rd),
|
||||
.Px(Px),
|
||||
.T1(T1),
|
||||
.T2(T2),
|
||||
.T3(T3),
|
||||
.NEAR(NEAR),
|
||||
.context_valid(context_valid),
|
||||
.context_ready(context_ready),
|
||||
.context_sample(context_sample),
|
||||
.context_x(context_x),
|
||||
.context_y(context_y),
|
||||
.context_strip_first_pixel(context_strip_first_pixel),
|
||||
.context_strip_last_pixel(context_strip_last_pixel),
|
||||
.context_Px(context_Px),
|
||||
.context_Ra(context_Ra),
|
||||
.context_Rb(context_Rb),
|
||||
.context_Rc(context_Rc),
|
||||
.context_Rd(context_Rd),
|
||||
.Q1(Q1),
|
||||
.Q2(Q2),
|
||||
.Q3(Q3),
|
||||
.context_index(context_index),
|
||||
.context_negative(context_negative),
|
||||
.run_mode_context(run_mode_context)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_q1_mem[0] = 4'sd0;
|
||||
expected_q2_mem[0] = 4'sd0;
|
||||
expected_q3_mem[0] = 4'sd0;
|
||||
expected_index_mem[0] = 9'd0;
|
||||
expected_negative_mem[0] = 1'b0;
|
||||
expected_run_mem[0] = 1'b1;
|
||||
|
||||
expected_q1_mem[1] = 4'sd4;
|
||||
expected_q2_mem[1] = 4'sd3;
|
||||
expected_q3_mem[1] = 4'sd1;
|
||||
expected_index_mem[1] = 9'd352;
|
||||
expected_negative_mem[1] = 1'b0;
|
||||
expected_run_mem[1] = 1'b0;
|
||||
|
||||
expected_q1_mem[2] = -4'sd4;
|
||||
expected_q2_mem[2] = -4'sd3;
|
||||
expected_q3_mem[2] = -4'sd2;
|
||||
expected_index_mem[2] = 9'd353;
|
||||
expected_negative_mem[2] = 1'b1;
|
||||
expected_run_mem[2] = 1'b0;
|
||||
|
||||
expected_q1_mem[3] = 4'sd0;
|
||||
expected_q2_mem[3] = 4'sd1;
|
||||
expected_q3_mem[3] = -4'sd1;
|
||||
expected_index_mem[3] = 9'd8;
|
||||
expected_negative_mem[3] = 1'b0;
|
||||
expected_run_mem[3] = 1'b0;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
predict_valid = 1'b0;
|
||||
predict_sample = 16'd0;
|
||||
predict_x = 13'd0;
|
||||
predict_y = 13'd0;
|
||||
predict_strip_first_pixel = 1'b0;
|
||||
predict_strip_last_pixel = 1'b0;
|
||||
Ra = 16'd0;
|
||||
Rb = 16'd0;
|
||||
Rc = 16'd0;
|
||||
Rd = 16'd0;
|
||||
Px = 16'd0;
|
||||
T1 = 16'd3;
|
||||
T2 = 16'd7;
|
||||
T3 = 16'd21;
|
||||
NEAR = 6'd0;
|
||||
context_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (predict_ready);
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b1;
|
||||
predict_sample = 16'd100;
|
||||
Ra = 16'd10;
|
||||
Rb = 16'd10;
|
||||
Rc = 16'd10;
|
||||
Rd = 16'd10;
|
||||
Px = 16'd10;
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b0;
|
||||
|
||||
wait (predict_ready);
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b1;
|
||||
predict_sample = 16'd101;
|
||||
predict_x = 13'd1;
|
||||
Ra = 16'd0;
|
||||
Rb = 16'd10;
|
||||
Rc = 16'd2;
|
||||
Rd = 16'd32;
|
||||
Px = 16'd15;
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b0;
|
||||
|
||||
wait (predict_ready);
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b1;
|
||||
predict_sample = 16'd102;
|
||||
predict_x = 13'd2;
|
||||
Ra = 16'd34;
|
||||
Rb = 16'd22;
|
||||
Rc = 16'd30;
|
||||
Rd = 16'd0;
|
||||
Px = 16'd20;
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b0;
|
||||
|
||||
wait (predict_ready);
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b1;
|
||||
predict_sample = 16'd103;
|
||||
predict_x = 13'd3;
|
||||
predict_strip_last_pixel = 1'b1;
|
||||
Ra = 16'd10;
|
||||
Rb = 16'd10;
|
||||
Rc = 16'd7;
|
||||
Rd = 16'd12;
|
||||
Px = 16'd9;
|
||||
T1 = 16'd7;
|
||||
T2 = 16'd10;
|
||||
T3 = 16'd25;
|
||||
NEAR = 6'd2;
|
||||
@(posedge clk);
|
||||
predict_valid = 1'b0;
|
||||
predict_strip_last_pixel = 1'b0;
|
||||
|
||||
wait (receive_index == EXPECTED_EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (context_valid && context_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra context event");
|
||||
end
|
||||
|
||||
if (Q1 !== expected_q1_mem[receive_index] ||
|
||||
Q2 !== expected_q2_mem[receive_index] ||
|
||||
Q3 !== expected_q3_mem[receive_index]) begin
|
||||
$fatal(1, "Q mismatch at %0d: got %0d,%0d,%0d",
|
||||
receive_index, Q1, Q2, Q3);
|
||||
end
|
||||
|
||||
if (context_index !== expected_index_mem[receive_index]) begin
|
||||
$fatal(1, "context_index mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, context_index, expected_index_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (context_negative !== expected_negative_mem[receive_index]) begin
|
||||
$fatal(1, "context_negative mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (run_mode_context !== expected_run_mem[receive_index]) begin
|
||||
$fatal(1, "run_mode_context mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (context_sample !== (16'd100 + receive_index[15:0])) begin
|
||||
$fatal(1, "context_sample mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for context quantizer smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_context_quantizer");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
233
fpga/sim/tb_jls_context_update.sv
Normal file
233
fpga/sim/tb_jls_context_update.sv
Normal file
@@ -0,0 +1,233 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Golomb parameter, Annex A.6 variables update
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Compute k and update regular-mode A/B/C/N
|
||||
// Example : Checks positive, negative, RESET, and C saturation updates.
|
||||
//
|
||||
// Smoke test for jls_context_update.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_context_update;
|
||||
|
||||
// Expected update events.
|
||||
localparam int EXPECTED_EVENT_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Input update event.
|
||||
logic update_valid;
|
||||
logic update_ready;
|
||||
logic [31:0] A_in;
|
||||
logic signed [31:0] B_in;
|
||||
logic signed [8:0] C_in;
|
||||
logic [15:0] N_in;
|
||||
logic signed [31:0] Errval;
|
||||
logic [8:0] context_index_in;
|
||||
logic strip_last_pixel_in;
|
||||
logic [4:0] qbpp_in;
|
||||
logic [6:0] LIMIT_in;
|
||||
logic [5:0] NEAR;
|
||||
logic [15:0] RESET;
|
||||
|
||||
// Output update event.
|
||||
logic result_valid;
|
||||
logic result_ready;
|
||||
logic [4:0] k;
|
||||
logic signed [31:0] Errval_out;
|
||||
logic [8:0] context_index_out;
|
||||
logic strip_last_pixel_out;
|
||||
logic [4:0] qbpp_out;
|
||||
logic [6:0] LIMIT_out;
|
||||
logic map_invert;
|
||||
logic [31:0] A_out;
|
||||
logic signed [31:0] B_out;
|
||||
logic signed [8:0] C_out;
|
||||
logic [15:0] N_out;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [4:0] expected_k_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [31:0] expected_a_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic signed [31:0] expected_b_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic signed [8:0] expected_c_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [15:0] expected_n_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_context_update dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.update_valid(update_valid),
|
||||
.update_ready(update_ready),
|
||||
.A_in(A_in),
|
||||
.B_in(B_in),
|
||||
.C_in(C_in),
|
||||
.N_in(N_in),
|
||||
.Errval(Errval),
|
||||
.context_index_in(context_index_in),
|
||||
.strip_last_pixel_in(strip_last_pixel_in),
|
||||
.qbpp_in(qbpp_in),
|
||||
.LIMIT_in(LIMIT_in),
|
||||
.NEAR(NEAR),
|
||||
.RESET(RESET),
|
||||
.result_valid(result_valid),
|
||||
.result_ready(result_ready),
|
||||
.k(k),
|
||||
.Errval_out(Errval_out),
|
||||
.context_index_out(context_index_out),
|
||||
.strip_last_pixel_out(strip_last_pixel_out),
|
||||
.qbpp_out(qbpp_out),
|
||||
.LIMIT_out(LIMIT_out),
|
||||
.map_invert(map_invert),
|
||||
.A_out(A_out),
|
||||
.B_out(B_out),
|
||||
.C_out(C_out),
|
||||
.N_out(N_out)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_k_mem[0] = 5'd2;
|
||||
expected_a_mem[0] = 32'd7;
|
||||
expected_b_mem[0] = 32'sd0;
|
||||
expected_c_mem[0] = 9'sd1;
|
||||
expected_n_mem[0] = 16'd2;
|
||||
|
||||
expected_k_mem[1] = 5'd2;
|
||||
expected_a_mem[1] = 32'd15;
|
||||
expected_b_mem[1] = -32'sd4;
|
||||
expected_c_mem[1] = -9'sd1;
|
||||
expected_n_mem[1] = 16'd5;
|
||||
|
||||
expected_k_mem[2] = 5'd1;
|
||||
expected_a_mem[2] = 32'd52;
|
||||
expected_b_mem[2] = -32'sd21;
|
||||
expected_c_mem[2] = 9'sd6;
|
||||
expected_n_mem[2] = 16'd33;
|
||||
|
||||
expected_k_mem[3] = 5'd0;
|
||||
expected_a_mem[3] = 32'd1;
|
||||
expected_b_mem[3] = 32'sd0;
|
||||
expected_c_mem[3] = 9'sd127;
|
||||
expected_n_mem[3] = 16'd2;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
update_valid = 1'b0;
|
||||
A_in = 32'd0;
|
||||
B_in = 32'sd0;
|
||||
C_in = 9'sd0;
|
||||
N_in = 16'd1;
|
||||
Errval = 32'sd0;
|
||||
context_index_in = 9'd0;
|
||||
strip_last_pixel_in = 1'b0;
|
||||
qbpp_in = 5'd8;
|
||||
LIMIT_in = 7'd32;
|
||||
NEAR = 6'd0;
|
||||
RESET = 16'd64;
|
||||
result_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (update_ready);
|
||||
@(posedge clk);
|
||||
update_valid = 1'b1;
|
||||
A_in = 32'd4;
|
||||
B_in = 32'sd0;
|
||||
C_in = 9'sd0;
|
||||
N_in = 16'd1;
|
||||
Errval = 32'sd3;
|
||||
NEAR = 6'd0;
|
||||
@(posedge clk);
|
||||
update_valid = 1'b0;
|
||||
|
||||
wait (update_ready);
|
||||
@(posedge clk);
|
||||
update_valid = 1'b1;
|
||||
A_in = 32'd10;
|
||||
B_in = -32'sd1;
|
||||
C_in = 9'sd0;
|
||||
N_in = 16'd4;
|
||||
Errval = -32'sd5;
|
||||
NEAR = 6'd1;
|
||||
@(posedge clk);
|
||||
update_valid = 1'b0;
|
||||
|
||||
wait (update_ready);
|
||||
@(posedge clk);
|
||||
update_valid = 1'b1;
|
||||
A_in = 32'd100;
|
||||
B_in = 32'sd20;
|
||||
C_in = 9'sd5;
|
||||
N_in = 16'd64;
|
||||
Errval = 32'sd4;
|
||||
NEAR = 6'd0;
|
||||
@(posedge clk);
|
||||
update_valid = 1'b0;
|
||||
|
||||
wait (update_ready);
|
||||
@(posedge clk);
|
||||
update_valid = 1'b1;
|
||||
A_in = 32'd1;
|
||||
B_in = 32'sd10;
|
||||
C_in = 9'sd127;
|
||||
N_in = 16'd1;
|
||||
Errval = 32'sd0;
|
||||
NEAR = 6'd0;
|
||||
@(posedge clk);
|
||||
update_valid = 1'b0;
|
||||
|
||||
wait (receive_index == EXPECTED_EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (result_valid && result_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra context update event");
|
||||
end
|
||||
|
||||
if (k !== expected_k_mem[receive_index]) begin
|
||||
$fatal(1, "k mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, k, expected_k_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (A_out !== expected_a_mem[receive_index] ||
|
||||
B_out !== expected_b_mem[receive_index] ||
|
||||
C_out !== expected_c_mem[receive_index] ||
|
||||
N_out !== expected_n_mem[receive_index]) begin
|
||||
$fatal(1, "context update mismatch at %0d: A=%0d B=%0d C=%0d N=%0d",
|
||||
receive_index, A_out, B_out, C_out, N_out);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for context update smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_context_update");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
195
fpga/sim/tb_jls_error_mapper.sv
Normal file
195
fpga/sim/tb_jls_error_mapper.sv
Normal file
@@ -0,0 +1,195 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Map signed Errval into non-negative MErrval
|
||||
// Example : Checks positive, negative, and context-inverted values.
|
||||
//
|
||||
// Smoke test for jls_error_mapper.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_error_mapper;
|
||||
|
||||
// Expected mapped-error events.
|
||||
localparam int EXPECTED_EVENT_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Error input interface.
|
||||
logic err_valid;
|
||||
logic err_ready;
|
||||
logic signed [31:0] Errval;
|
||||
logic map_invert;
|
||||
logic [4:0] k;
|
||||
logic [6:0] limit;
|
||||
logic [4:0] qbpp;
|
||||
logic strip_last_pixel;
|
||||
|
||||
// Mapped output interface.
|
||||
logic mapped_valid;
|
||||
logic mapped_ready;
|
||||
logic [31:0] MErrval;
|
||||
logic [4:0] mapped_k;
|
||||
logic [6:0] mapped_limit;
|
||||
logic [4:0] mapped_qbpp;
|
||||
logic mapped_strip_last_pixel;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [31:0] expected_merrval_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [4:0] expected_k_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [6:0] expected_limit_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [4:0] expected_qbpp_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_error_mapper dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.err_valid(err_valid),
|
||||
.err_ready(err_ready),
|
||||
.Errval(Errval),
|
||||
.map_invert(map_invert),
|
||||
.k(k),
|
||||
.limit(limit),
|
||||
.qbpp(qbpp),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.mapped_valid(mapped_valid),
|
||||
.mapped_ready(mapped_ready),
|
||||
.MErrval(MErrval),
|
||||
.mapped_k(mapped_k),
|
||||
.mapped_limit(mapped_limit),
|
||||
.mapped_qbpp(mapped_qbpp),
|
||||
.mapped_strip_last_pixel(mapped_strip_last_pixel)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_merrval_mem[0] = 32'd6;
|
||||
expected_k_mem[0] = 5'd1;
|
||||
expected_limit_mem[0] = 7'd32;
|
||||
expected_qbpp_mem[0] = 5'd8;
|
||||
|
||||
expected_merrval_mem[1] = 32'd5;
|
||||
expected_k_mem[1] = 5'd2;
|
||||
expected_limit_mem[1] = 7'd32;
|
||||
expected_qbpp_mem[1] = 5'd8;
|
||||
|
||||
expected_merrval_mem[2] = 32'd7;
|
||||
expected_k_mem[2] = 5'd3;
|
||||
expected_limit_mem[2] = 7'd31;
|
||||
expected_qbpp_mem[2] = 5'd8;
|
||||
|
||||
expected_merrval_mem[3] = 32'd4;
|
||||
expected_k_mem[3] = 5'd4;
|
||||
expected_limit_mem[3] = 7'd30;
|
||||
expected_qbpp_mem[3] = 5'd7;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
err_valid = 1'b0;
|
||||
Errval = 32'sd0;
|
||||
map_invert = 1'b0;
|
||||
k = 5'd0;
|
||||
limit = 7'd0;
|
||||
qbpp = 5'd0;
|
||||
strip_last_pixel = 1'b0;
|
||||
mapped_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (err_ready);
|
||||
@(posedge clk);
|
||||
err_valid = 1'b1;
|
||||
Errval = 32'sd3;
|
||||
map_invert = 1'b0;
|
||||
k = 5'd1;
|
||||
limit = 7'd32;
|
||||
qbpp = 5'd8;
|
||||
@(posedge clk);
|
||||
err_valid = 1'b0;
|
||||
|
||||
wait (err_ready);
|
||||
@(posedge clk);
|
||||
err_valid = 1'b1;
|
||||
Errval = -32'sd3;
|
||||
map_invert = 1'b0;
|
||||
k = 5'd2;
|
||||
limit = 7'd32;
|
||||
qbpp = 5'd8;
|
||||
@(posedge clk);
|
||||
err_valid = 1'b0;
|
||||
|
||||
wait (err_ready);
|
||||
@(posedge clk);
|
||||
err_valid = 1'b1;
|
||||
Errval = 32'sd3;
|
||||
map_invert = 1'b1;
|
||||
k = 5'd3;
|
||||
limit = 7'd31;
|
||||
qbpp = 5'd8;
|
||||
@(posedge clk);
|
||||
err_valid = 1'b0;
|
||||
|
||||
wait (err_ready);
|
||||
@(posedge clk);
|
||||
err_valid = 1'b1;
|
||||
Errval = -32'sd3;
|
||||
map_invert = 1'b1;
|
||||
k = 5'd4;
|
||||
limit = 7'd30;
|
||||
qbpp = 5'd7;
|
||||
@(posedge clk);
|
||||
err_valid = 1'b0;
|
||||
|
||||
wait (receive_index == EXPECTED_EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (mapped_valid && mapped_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra mapped-error event");
|
||||
end
|
||||
|
||||
if (MErrval !== expected_merrval_mem[receive_index]) begin
|
||||
$fatal(1, "MErrval mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, MErrval, expected_merrval_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (mapped_k !== expected_k_mem[receive_index] ||
|
||||
mapped_limit !== expected_limit_mem[receive_index] ||
|
||||
mapped_qbpp !== expected_qbpp_mem[receive_index]) begin
|
||||
$fatal(1, "forwarded Golomb parameter mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for error mapper smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_error_mapper");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
192
fpga/sim/tb_jls_golomb_encoder.sv
Normal file
192
fpga/sim/tb_jls_golomb_encoder.sv
Normal file
@@ -0,0 +1,192 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Golomb coding of MErrval with k, LIMIT, and qbpp
|
||||
// Example : MErrval=5,k=1 emits prefix bits 0,0,1 then suffix bit 1.
|
||||
//
|
||||
// Smoke test for jls_golomb_encoder. The test watches code events directly:
|
||||
// 1. regular Golomb path with a non-zero k,
|
||||
// 2. regular path with k=0 and no suffix,
|
||||
// 3. LIMIT path with a qbpp-wide suffix.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_golomb_encoder;
|
||||
|
||||
// Code-event width used by the smoke test.
|
||||
localparam int MAX_CODE_BITS = 64;
|
||||
|
||||
// Expected direct code-event count across all three mapped-error samples.
|
||||
localparam int EXPECTED_EVENT_COUNT = 5;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Mapped-error input interface.
|
||||
logic mapped_valid;
|
||||
logic mapped_ready;
|
||||
logic [31:0] MErrval;
|
||||
logic [4:0] k;
|
||||
logic [6:0] limit;
|
||||
logic [4:0] qbpp;
|
||||
logic mapped_strip_last_pixel;
|
||||
|
||||
// Code-event output interface.
|
||||
logic code_valid;
|
||||
logic code_ready;
|
||||
logic [MAX_CODE_BITS-1:0] code_bits;
|
||||
logic [6:0] code_bit_count;
|
||||
logic mapped_done;
|
||||
logic mapped_last_done;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [MAX_CODE_BITS-1:0] expected_bits_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [6:0] expected_count_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
int done_count;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_golomb_encoder #(
|
||||
.MAX_CODE_BITS(MAX_CODE_BITS)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.mapped_valid(mapped_valid),
|
||||
.mapped_ready(mapped_ready),
|
||||
.MErrval(MErrval),
|
||||
.k(k),
|
||||
.limit(limit),
|
||||
.qbpp(qbpp),
|
||||
.mapped_strip_last_pixel(mapped_strip_last_pixel),
|
||||
.code_valid(code_valid),
|
||||
.code_ready(code_ready),
|
||||
.code_bits(code_bits),
|
||||
.code_bit_count(code_bit_count),
|
||||
.mapped_done(mapped_done),
|
||||
.mapped_last_done(mapped_last_done)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
// Regular path, MErrval=5,k=1:
|
||||
// high_bits=2 -> prefix 0,0,1; suffix is the low one bit, 1.
|
||||
expected_bits_mem[0] = 64'h2000_0000_0000_0000;
|
||||
expected_count_mem[0] = 7'd3;
|
||||
expected_bits_mem[1] = 64'h8000_0000_0000_0000;
|
||||
expected_count_mem[1] = 7'd1;
|
||||
|
||||
// Regular path, MErrval=0,k=0: prefix is just the terminating one bit.
|
||||
expected_bits_mem[2] = 64'h8000_0000_0000_0000;
|
||||
expected_count_mem[2] = 7'd1;
|
||||
|
||||
// LIMIT path, MErrval=5,limit=10,qbpp=8:
|
||||
// limit-qbpp=2 -> prefix 0,1; qbpp suffix uses MErrval-1 = 4.
|
||||
expected_bits_mem[3] = 64'h4000_0000_0000_0000;
|
||||
expected_count_mem[3] = 7'd2;
|
||||
expected_bits_mem[4] = 64'h0400_0000_0000_0000;
|
||||
expected_count_mem[4] = 7'd8;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
mapped_valid = 1'b0;
|
||||
MErrval = 32'd0;
|
||||
k = 5'd0;
|
||||
limit = 7'd32;
|
||||
qbpp = 5'd8;
|
||||
mapped_strip_last_pixel = 1'b0;
|
||||
code_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (mapped_ready);
|
||||
@(posedge clk);
|
||||
MErrval = 32'd5;
|
||||
k = 5'd1;
|
||||
limit = 7'd32;
|
||||
qbpp = 5'd8;
|
||||
mapped_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
mapped_valid = 1'b0;
|
||||
wait (done_count == 1);
|
||||
|
||||
wait (mapped_ready);
|
||||
@(posedge clk);
|
||||
MErrval = 32'd0;
|
||||
k = 5'd0;
|
||||
limit = 7'd32;
|
||||
qbpp = 5'd8;
|
||||
mapped_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
mapped_valid = 1'b0;
|
||||
wait (done_count == 2);
|
||||
|
||||
wait (mapped_ready);
|
||||
@(posedge clk);
|
||||
MErrval = 32'd5;
|
||||
k = 5'd0;
|
||||
limit = 7'd10;
|
||||
qbpp = 5'd8;
|
||||
mapped_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
mapped_valid = 1'b0;
|
||||
wait (done_count == 3);
|
||||
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
done_count <= 0;
|
||||
end else begin
|
||||
if (code_valid && code_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra Golomb code event");
|
||||
end
|
||||
|
||||
if (code_bit_count !== expected_count_mem[receive_index]) begin
|
||||
$fatal(1, "Golomb code count mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, code_bit_count, expected_count_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (code_bits !== expected_bits_mem[receive_index]) begin
|
||||
$fatal(1, "Golomb code bits mismatch at %0d: got 0x%016h expected 0x%016h",
|
||||
receive_index, code_bits, expected_bits_mem[receive_index]);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
|
||||
if (mapped_done) begin
|
||||
done_count <= done_count + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for Golomb encoder smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (receive_index !== EXPECTED_EVENT_COUNT || done_count !== 3) begin
|
||||
$fatal(1, "Golomb encoder smoke count mismatch");
|
||||
end
|
||||
$display("PASS: tb_jls_golomb_encoder");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
215
fpga/sim/tb_jls_header_writer.sv
Normal file
215
fpga/sim/tb_jls_header_writer.sv
Normal file
@@ -0,0 +1,215 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.2 frame header, C.2.3 scan header, C.2.4.1 LSE
|
||||
// Figure : N/A
|
||||
// Table : Table C.1 preset parameters, Table C.2 RESET, Table C.3 defaults
|
||||
// Pseudocode : JPEG-LS marker segment emission before and after one scan
|
||||
// Example : 8-bit 32x16 strip with NEAR=0 and default LSE parameters.
|
||||
//
|
||||
// Smoke test for jls_header_writer. The test checks exact marker bytes for a
|
||||
// single standalone strip frame header and the trailing EOI marker.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_header_writer;
|
||||
|
||||
// Test precision. 8-bit defaults use MAXVAL=255, T1=3, T2=7, T3=21.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
|
||||
// Expected stream is header bytes plus EOI after a finish command.
|
||||
localparam int HEADER_BYTE_COUNT = 40;
|
||||
localparam int TOTAL_BYTE_COUNT = 42;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Header writer command inputs.
|
||||
logic strip_start_valid;
|
||||
logic strip_start_ready;
|
||||
logic original_image_first_strip;
|
||||
logic [12:0] strip_width;
|
||||
logic [12:0] strip_height;
|
||||
logic [5:0] near;
|
||||
logic [15:0] preset_maxval;
|
||||
logic [15:0] preset_t1;
|
||||
logic [15:0] preset_t2;
|
||||
logic [15:0] preset_t3;
|
||||
logic [15:0] preset_reset;
|
||||
logic strip_finish_valid;
|
||||
logic strip_finish_ready;
|
||||
|
||||
// Byte output under test.
|
||||
logic byte_valid;
|
||||
logic byte_ready;
|
||||
logic [7:0] byte_data;
|
||||
logic original_image_start;
|
||||
logic header_done;
|
||||
logic eoi_done;
|
||||
|
||||
// Expected byte stream and scoreboard state.
|
||||
logic [7:0] expected_mem [0:TOTAL_BYTE_COUNT-1];
|
||||
int byte_count;
|
||||
logic header_done_seen;
|
||||
logic done_seen;
|
||||
|
||||
jls_header_writer #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.strip_start_valid(strip_start_valid),
|
||||
.strip_start_ready(strip_start_ready),
|
||||
.original_image_first_strip(original_image_first_strip),
|
||||
.strip_width(strip_width),
|
||||
.strip_height(strip_height),
|
||||
.near(near),
|
||||
.preset_maxval(preset_maxval),
|
||||
.preset_t1(preset_t1),
|
||||
.preset_t2(preset_t2),
|
||||
.preset_t3(preset_t3),
|
||||
.preset_reset(preset_reset),
|
||||
.strip_finish_valid(strip_finish_valid),
|
||||
.strip_finish_ready(strip_finish_ready),
|
||||
.byte_valid(byte_valid),
|
||||
.byte_ready(byte_ready),
|
||||
.byte_data(byte_data),
|
||||
.original_image_start(original_image_start),
|
||||
.header_done(header_done),
|
||||
.eoi_done(eoi_done)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_mem[0] = 8'hFF;
|
||||
expected_mem[1] = 8'hD8;
|
||||
expected_mem[2] = 8'hFF;
|
||||
expected_mem[3] = 8'hF7;
|
||||
expected_mem[4] = 8'h00;
|
||||
expected_mem[5] = 8'h0B;
|
||||
expected_mem[6] = 8'h08;
|
||||
expected_mem[7] = 8'h00;
|
||||
expected_mem[8] = 8'h10;
|
||||
expected_mem[9] = 8'h00;
|
||||
expected_mem[10] = 8'h20;
|
||||
expected_mem[11] = 8'h01;
|
||||
expected_mem[12] = 8'h01;
|
||||
expected_mem[13] = 8'h11;
|
||||
expected_mem[14] = 8'h00;
|
||||
expected_mem[15] = 8'hFF;
|
||||
expected_mem[16] = 8'hF8;
|
||||
expected_mem[17] = 8'h00;
|
||||
expected_mem[18] = 8'h0D;
|
||||
expected_mem[19] = 8'h01;
|
||||
expected_mem[20] = 8'h00;
|
||||
expected_mem[21] = 8'hFF;
|
||||
expected_mem[22] = 8'h00;
|
||||
expected_mem[23] = 8'h03;
|
||||
expected_mem[24] = 8'h00;
|
||||
expected_mem[25] = 8'h07;
|
||||
expected_mem[26] = 8'h00;
|
||||
expected_mem[27] = 8'h15;
|
||||
expected_mem[28] = 8'h00;
|
||||
expected_mem[29] = 8'h40;
|
||||
expected_mem[30] = 8'hFF;
|
||||
expected_mem[31] = 8'hDA;
|
||||
expected_mem[32] = 8'h00;
|
||||
expected_mem[33] = 8'h08;
|
||||
expected_mem[34] = 8'h01;
|
||||
expected_mem[35] = 8'h01;
|
||||
expected_mem[36] = 8'h00;
|
||||
expected_mem[37] = 8'h00;
|
||||
expected_mem[38] = 8'h00;
|
||||
expected_mem[39] = 8'h00;
|
||||
expected_mem[40] = 8'hFF;
|
||||
expected_mem[41] = 8'hD9;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
strip_start_valid = 1'b0;
|
||||
original_image_first_strip = 1'b1;
|
||||
strip_width = 13'd32;
|
||||
strip_height = 13'd16;
|
||||
near = 6'd0;
|
||||
preset_maxval = 16'd255;
|
||||
preset_t1 = 16'd3;
|
||||
preset_t2 = 16'd7;
|
||||
preset_t3 = 16'd21;
|
||||
preset_reset = 16'd64;
|
||||
strip_finish_valid = 1'b0;
|
||||
byte_ready = 1'b1;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
repeat (2) @(posedge clk);
|
||||
strip_start_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_start_valid = 1'b0;
|
||||
|
||||
wait (header_done_seen);
|
||||
repeat (2) @(posedge clk);
|
||||
strip_finish_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_finish_valid = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
byte_count <= 0;
|
||||
header_done_seen <= 1'b0;
|
||||
done_seen <= 1'b0;
|
||||
end else if (byte_valid && byte_ready) begin
|
||||
if (byte_count >= TOTAL_BYTE_COUNT) begin
|
||||
$fatal(1, "Unexpected extra byte 0x%02h", byte_data);
|
||||
end
|
||||
|
||||
if (byte_data !== expected_mem[byte_count]) begin
|
||||
$fatal(1, "byte mismatch at %0d: got 0x%02h expected 0x%02h",
|
||||
byte_count, byte_data, expected_mem[byte_count]);
|
||||
end
|
||||
|
||||
if (byte_count == 0) begin
|
||||
if (original_image_start !== 1'b1) begin
|
||||
$fatal(1, "original_image_start should be high on the first SOI byte");
|
||||
end
|
||||
end else begin
|
||||
if (original_image_start !== 1'b0) begin
|
||||
$fatal(1, "original_image_start should be low at byte %0d", byte_count);
|
||||
end
|
||||
end
|
||||
|
||||
if (byte_count == (HEADER_BYTE_COUNT - 1)) begin
|
||||
header_done_seen <= 1'b1;
|
||||
end
|
||||
|
||||
if (byte_count == (TOTAL_BYTE_COUNT - 1)) begin
|
||||
done_seen <= 1'b1;
|
||||
end
|
||||
|
||||
byte_count <= byte_count + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (1000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for header writer output");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (byte_count !== TOTAL_BYTE_COUNT) begin
|
||||
$fatal(1, "byte_count mismatch: got %0d", byte_count);
|
||||
end
|
||||
$display("PASS: tb_jls_header_writer");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
284
fpga/sim/tb_jls_input_ctrl.sv
Normal file
284
fpga/sim/tb_jls_input_ctrl.sv
Normal file
@@ -0,0 +1,284 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 Control procedure, Annex D.1-D.3 scan control
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Source image sample ordering before JPEG-LS encoding
|
||||
// Example : 16x16 input image split into four 4-row strip frames.
|
||||
//
|
||||
// Smoke test for jls_input_ctrl. The test drives a standard synchronous FIFO
|
||||
// model with one-cycle read latency, discards pre-SOF words, then checks the
|
||||
// generated coordinates and strip/image boundary flags.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_input_ctrl;
|
||||
|
||||
// Test precision. The first smoke uses 16-bit samples because it exercises
|
||||
// the 18-bit packed FIFO word with one unused spare bit.
|
||||
localparam int PIX_WIDTH = 16;
|
||||
|
||||
// Compact test image size. Width and height are kept at the RTL minimum
|
||||
// accepted by jls_input_ctrl.
|
||||
localparam int PIC_COL = 16;
|
||||
localparam int PIC_ROW = 16;
|
||||
|
||||
// Four source rows per standalone strip frame in this smoke test.
|
||||
localparam int SCAN_ROWS = 4;
|
||||
|
||||
// Words before SOF verify that the controller waits for image start.
|
||||
localparam int PREAMBLE_COUNT = 3;
|
||||
|
||||
// Packed input FIFO word: bit 17 is SOF for 16-bit samples, bit 16 is spare.
|
||||
localparam int IFIFO_DATA_WIDTH = ((PIX_WIDTH + 7) / 8) * 9;
|
||||
localparam int SOF_BIT_INDEX = 17;
|
||||
localparam int FIFO_WORD_COUNT = PREAMBLE_COUNT + (PIC_COL * PIC_ROW);
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Runtime configuration sampled by the design at SOF.
|
||||
logic [12:0] cfg_pic_col;
|
||||
logic [12:0] cfg_pic_row;
|
||||
logic [3:0] ratio;
|
||||
|
||||
// Synchronous input FIFO interface.
|
||||
logic ififo_rclk;
|
||||
logic ififo_rd;
|
||||
logic [IFIFO_DATA_WIDTH-1:0] ififo_rdata;
|
||||
logic ififo_empty;
|
||||
logic ififo_alempty;
|
||||
|
||||
// Downstream flow-control inputs.
|
||||
logic pixel_ready;
|
||||
logic pause_req;
|
||||
|
||||
// Pixel event outputs under test.
|
||||
logic pixel_valid;
|
||||
logic pixel_sof;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic strip_first_pixel;
|
||||
logic strip_last_pixel;
|
||||
logic image_first_pixel;
|
||||
logic image_last_pixel;
|
||||
logic [12:0] active_pic_col;
|
||||
logic [12:0] active_pic_row;
|
||||
logic [3:0] active_ratio;
|
||||
logic active_cfg_valid;
|
||||
logic image_active;
|
||||
|
||||
// FIFO memory and scoreboard state.
|
||||
logic [IFIFO_DATA_WIDTH-1:0] fifo_mem [0:FIFO_WORD_COUNT-1];
|
||||
int fifo_rd_index;
|
||||
int init_index;
|
||||
int event_count;
|
||||
logic [12:0] expected_x;
|
||||
logic [12:0] expected_y;
|
||||
logic expected_strip_first;
|
||||
logic expected_strip_last;
|
||||
logic expected_image_first;
|
||||
logic expected_image_last;
|
||||
logic done_seen;
|
||||
|
||||
jls_input_ctrl #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.DEFAULT_PIC_COL(PIC_COL),
|
||||
.DEFAULT_PIC_ROW(PIC_ROW),
|
||||
.MAX_PIC_COL(PIC_COL),
|
||||
.MAX_PIC_ROW(PIC_ROW),
|
||||
.SCAN_ROWS(SCAN_ROWS)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.cfg_pic_col(cfg_pic_col),
|
||||
.cfg_pic_row(cfg_pic_row),
|
||||
.ratio(ratio),
|
||||
.ififo_rclk(ififo_rclk),
|
||||
.ififo_rd(ififo_rd),
|
||||
.ififo_rdata(ififo_rdata),
|
||||
.ififo_empty(ififo_empty),
|
||||
.ififo_alempty(ififo_alempty),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pause_req(pause_req),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_sof(pixel_sof),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.strip_first_pixel(strip_first_pixel),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.image_first_pixel(image_first_pixel),
|
||||
.image_last_pixel(image_last_pixel),
|
||||
.active_pic_col(active_pic_col),
|
||||
.active_pic_row(active_pic_row),
|
||||
.active_ratio(active_ratio),
|
||||
.active_cfg_valid(active_cfg_valid),
|
||||
.image_active(image_active)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ififo_empty = 1'b0;
|
||||
ififo_alempty = 1'b0;
|
||||
if (fifo_rd_index >= FIFO_WORD_COUNT) begin
|
||||
ififo_empty = 1'b1;
|
||||
ififo_alempty = 1'b1;
|
||||
end else if (fifo_rd_index >= (FIFO_WORD_COUNT - 1)) begin
|
||||
ififo_alempty = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
expected_strip_first = 1'b0;
|
||||
if (expected_x == 13'd0 && expected_y[1:0] == 2'd0) begin
|
||||
expected_strip_first = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
expected_strip_last = 1'b0;
|
||||
if (expected_x == 13'd15 && expected_y[1:0] == 2'd3) begin
|
||||
expected_strip_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
expected_image_first = 1'b0;
|
||||
if (event_count == 0) begin
|
||||
expected_image_first = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
expected_image_last = 1'b0;
|
||||
if (event_count == ((PIC_COL * PIC_ROW) - 1)) begin
|
||||
expected_image_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
cfg_pic_col = PIC_COL[12:0];
|
||||
cfg_pic_row = PIC_ROW[12:0];
|
||||
ratio = 4'd2;
|
||||
pixel_ready = 1'b1;
|
||||
pause_req = 1'b0;
|
||||
|
||||
for (init_index = 0; init_index < FIFO_WORD_COUNT; init_index = init_index + 1) begin
|
||||
fifo_mem[init_index] = {IFIFO_DATA_WIDTH{1'b0}};
|
||||
end
|
||||
|
||||
for (init_index = 0; init_index < (PIC_COL * PIC_ROW); init_index = init_index + 1) begin
|
||||
fifo_mem[PREAMBLE_COUNT + init_index][PIX_WIDTH-1:0] = init_index;
|
||||
end
|
||||
|
||||
fifo_mem[PREAMBLE_COUNT][SOF_BIT_INDEX] = 1'b1;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
fifo_rd_index <= 0;
|
||||
ififo_rdata <= {IFIFO_DATA_WIDTH{1'b0}};
|
||||
end else if (ififo_rd && !ififo_empty) begin
|
||||
ififo_rdata <= fifo_mem[fifo_rd_index];
|
||||
fifo_rd_index <= fifo_rd_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
event_count <= 0;
|
||||
expected_x <= 13'd0;
|
||||
expected_y <= 13'd0;
|
||||
done_seen <= 1'b0;
|
||||
end else if (pixel_valid && pixel_ready) begin
|
||||
if (ififo_rclk !== clk) begin
|
||||
$fatal(1, "ififo_rclk is not tied to clk");
|
||||
end
|
||||
|
||||
if (pixel_x !== expected_x) begin
|
||||
$fatal(1, "pixel_x mismatch: got %0d expected %0d", pixel_x, expected_x);
|
||||
end
|
||||
|
||||
if (pixel_y !== expected_y) begin
|
||||
$fatal(1, "pixel_y mismatch: got %0d expected %0d", pixel_y, expected_y);
|
||||
end
|
||||
|
||||
if (pixel_sof !== expected_image_first) begin
|
||||
$fatal(1, "pixel_sof mismatch at event %0d", event_count);
|
||||
end
|
||||
|
||||
if (strip_first_pixel !== expected_strip_first) begin
|
||||
$fatal(1, "strip_first_pixel mismatch at x=%0d y=%0d", expected_x, expected_y);
|
||||
end
|
||||
|
||||
if (strip_last_pixel !== expected_strip_last) begin
|
||||
$fatal(1, "strip_last_pixel mismatch at x=%0d y=%0d", expected_x, expected_y);
|
||||
end
|
||||
|
||||
if (image_first_pixel !== expected_image_first) begin
|
||||
$fatal(1, "image_first_pixel mismatch at event %0d", event_count);
|
||||
end
|
||||
|
||||
if (image_last_pixel !== expected_image_last) begin
|
||||
$fatal(1, "image_last_pixel mismatch at event %0d", event_count);
|
||||
end
|
||||
|
||||
if (active_pic_col !== PIC_COL[12:0]) begin
|
||||
$fatal(1, "active_pic_col mismatch: got %0d", active_pic_col);
|
||||
end
|
||||
|
||||
if (active_pic_row !== PIC_ROW[12:0]) begin
|
||||
$fatal(1, "active_pic_row mismatch: got %0d", active_pic_row);
|
||||
end
|
||||
|
||||
if (active_ratio !== 4'd2) begin
|
||||
$fatal(1, "active_ratio mismatch: got %0d", active_ratio);
|
||||
end
|
||||
|
||||
if (active_cfg_valid !== 1'b1) begin
|
||||
$fatal(1, "active_cfg_valid should be high for this test");
|
||||
end
|
||||
|
||||
if (expected_image_last) begin
|
||||
done_seen <= 1'b1;
|
||||
end
|
||||
|
||||
event_count <= event_count + 1;
|
||||
|
||||
if (expected_x == 13'd15) begin
|
||||
expected_x <= 13'd0;
|
||||
expected_y <= expected_y + 13'd1;
|
||||
end else begin
|
||||
expected_x <= expected_x + 13'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for input controller to emit the full image");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (event_count !== (PIC_COL * PIC_ROW)) begin
|
||||
$fatal(1, "event_count mismatch: got %0d", event_count);
|
||||
end
|
||||
$display("PASS: tb_jls_input_ctrl");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
381
fpga/sim/tb_jls_mode_router.sv
Normal file
381
fpga/sim/tb_jls_mode_router.sv
Normal file
@@ -0,0 +1,381 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.7 run mode
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : regular/run mode decision and run segment formation
|
||||
// Example : Two matching run pixels ending at EOL create run_length=2 with
|
||||
// no interruption sample.
|
||||
//
|
||||
// Smoke test for jls_mode_router.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_mode_router;
|
||||
|
||||
// Test precision.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Pixel input.
|
||||
logic pixel_valid;
|
||||
logic pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic pixel_strip_first_pixel;
|
||||
logic pixel_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
logic [PIX_WIDTH-1:0] Rc;
|
||||
logic [PIX_WIDTH-1:0] Rd;
|
||||
logic [12:0] strip_width;
|
||||
logic [5:0] NEAR;
|
||||
|
||||
// Regular output.
|
||||
logic regular_valid;
|
||||
logic regular_ready;
|
||||
logic [PIX_WIDTH-1:0] regular_sample;
|
||||
logic [12:0] regular_x;
|
||||
logic [12:0] regular_y;
|
||||
logic regular_strip_first_pixel;
|
||||
logic regular_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] regular_Ra;
|
||||
logic [PIX_WIDTH-1:0] regular_Rb;
|
||||
logic [PIX_WIDTH-1:0] regular_Rc;
|
||||
logic [PIX_WIDTH-1:0] regular_Rd;
|
||||
|
||||
// Run segment output.
|
||||
logic run_segment_valid;
|
||||
logic run_segment_ready;
|
||||
logic [12:0] run_length;
|
||||
logic run_end_of_line;
|
||||
logic run_interruption_valid;
|
||||
logic [PIX_WIDTH-1:0] run_interruption_sample;
|
||||
logic [12:0] run_interruption_x;
|
||||
logic [12:0] run_interruption_y;
|
||||
logic run_interruption_strip_first_pixel;
|
||||
logic run_interruption_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] run_Ra;
|
||||
logic [PIX_WIDTH-1:0] run_Rb;
|
||||
logic run_segment_done;
|
||||
|
||||
// Direct run-pixel reconstruction.
|
||||
logic run_recon_valid;
|
||||
logic run_recon_ready;
|
||||
logic [PIX_WIDTH-1:0] run_recon_sample;
|
||||
logic [12:0] run_recon_x;
|
||||
logic [12:0] run_recon_y;
|
||||
|
||||
// Scoreboard state.
|
||||
int regular_count;
|
||||
int run_segment_count;
|
||||
int run_recon_count;
|
||||
logic all_done_seen;
|
||||
logic eol_loaded_before_done;
|
||||
|
||||
jls_mode_router #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.pixel_strip_first_pixel(pixel_strip_first_pixel),
|
||||
.pixel_strip_last_pixel(pixel_strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.Rc(Rc),
|
||||
.Rd(Rd),
|
||||
.strip_width(strip_width),
|
||||
.NEAR(NEAR),
|
||||
.regular_valid(regular_valid),
|
||||
.regular_ready(regular_ready),
|
||||
.regular_sample(regular_sample),
|
||||
.regular_x(regular_x),
|
||||
.regular_y(regular_y),
|
||||
.regular_strip_first_pixel(regular_strip_first_pixel),
|
||||
.regular_strip_last_pixel(regular_strip_last_pixel),
|
||||
.regular_Ra(regular_Ra),
|
||||
.regular_Rb(regular_Rb),
|
||||
.regular_Rc(regular_Rc),
|
||||
.regular_Rd(regular_Rd),
|
||||
.run_segment_valid(run_segment_valid),
|
||||
.run_segment_ready(run_segment_ready),
|
||||
.run_length(run_length),
|
||||
.run_end_of_line(run_end_of_line),
|
||||
.run_interruption_valid(run_interruption_valid),
|
||||
.run_interruption_sample(run_interruption_sample),
|
||||
.run_interruption_x(run_interruption_x),
|
||||
.run_interruption_y(run_interruption_y),
|
||||
.run_interruption_strip_first_pixel(run_interruption_strip_first_pixel),
|
||||
.run_interruption_strip_last_pixel(run_interruption_strip_last_pixel),
|
||||
.run_Ra(run_Ra),
|
||||
.run_Rb(run_Rb),
|
||||
.run_segment_done(run_segment_done),
|
||||
.run_recon_valid(run_recon_valid),
|
||||
.run_recon_ready(run_recon_ready),
|
||||
.run_recon_sample(run_recon_sample),
|
||||
.run_recon_x(run_recon_x),
|
||||
.run_recon_y(run_recon_y)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 8'd0;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
pixel_strip_first_pixel = 1'b0;
|
||||
pixel_strip_last_pixel = 1'b0;
|
||||
Ra = 8'd0;
|
||||
Rb = 8'd0;
|
||||
Rc = 8'd0;
|
||||
Rd = 8'd0;
|
||||
strip_width = 13'd4;
|
||||
NEAR = 6'd0;
|
||||
regular_ready = 1'b1;
|
||||
run_segment_ready = 1'b1;
|
||||
run_segment_done = 1'b0;
|
||||
run_recon_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
eol_loaded_before_done = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
// Non-run gradients go to the regular path.
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd42;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
pixel_strip_first_pixel = 1'b1;
|
||||
pixel_strip_last_pixel = 1'b0;
|
||||
Ra = 8'd1;
|
||||
Rb = 8'd2;
|
||||
Rc = 8'd3;
|
||||
Rd = 8'd20;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_strip_first_pixel = 1'b0;
|
||||
wait (regular_count == 1);
|
||||
|
||||
// First run pixel, not EOL: immediate reconstructed Ra, no segment yet.
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd5;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd1;
|
||||
pixel_strip_first_pixel = 1'b1;
|
||||
Ra = 8'd5;
|
||||
Rb = 8'd5;
|
||||
Rc = 8'd5;
|
||||
Rd = 8'd5;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_strip_first_pixel = 1'b0;
|
||||
wait (run_recon_count == 1);
|
||||
|
||||
// Interruption pixel ends the run with run_length=1.
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd7;
|
||||
pixel_x = 13'd1;
|
||||
pixel_y = 13'd1;
|
||||
Ra = 8'd5;
|
||||
Rb = 8'd5;
|
||||
Rc = 8'd5;
|
||||
Rd = 8'd5;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
wait (run_segment_count == 1);
|
||||
|
||||
// A later non-EOL run pixel has no immediate entropy output, so it can be
|
||||
// accepted while the previous run segment is still waiting for done.
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd9;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd2;
|
||||
pixel_strip_first_pixel = 1'b1;
|
||||
Ra = 8'd9;
|
||||
Rb = 8'd9;
|
||||
Rc = 8'd9;
|
||||
Rd = 8'd9;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_strip_first_pixel = 1'b0;
|
||||
wait (run_recon_count == 2);
|
||||
|
||||
// The EOL pixel would emit the next run segment. The router may either
|
||||
// backpressure it or cache it in the input stage, but it must not emit the
|
||||
// second segment until the previous segment is reported done.
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd9;
|
||||
pixel_x = 13'd3;
|
||||
pixel_y = 13'd2;
|
||||
pixel_strip_last_pixel = 1'b1;
|
||||
Ra = 8'd9;
|
||||
Rb = 8'd9;
|
||||
Rc = 8'd9;
|
||||
Rd = 8'd9;
|
||||
#1;
|
||||
if (pixel_ready === 1'b1) begin
|
||||
eol_loaded_before_done = 1'b1;
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_strip_last_pixel = 1'b0;
|
||||
if (run_segment_count !== 1) begin
|
||||
$fatal(1, "EOL run segment emitted before previous segment_done");
|
||||
end
|
||||
end
|
||||
|
||||
@(negedge clk);
|
||||
run_segment_done = 1'b1;
|
||||
@(posedge clk);
|
||||
run_segment_done = 1'b0;
|
||||
|
||||
if (!eol_loaded_before_done) begin
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_strip_last_pixel = 1'b0;
|
||||
end
|
||||
wait (run_segment_count == 2 && run_recon_count == 3);
|
||||
|
||||
@(negedge clk);
|
||||
run_segment_done = 1'b1;
|
||||
@(posedge clk);
|
||||
run_segment_done = 1'b0;
|
||||
|
||||
// Once a run has started, a later nonmatching sample is a run-interruption
|
||||
// sample even if its gradients would no longer enter run mode from idle.
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd12;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd3;
|
||||
pixel_strip_first_pixel = 1'b1;
|
||||
Ra = 8'd12;
|
||||
Rb = 8'd12;
|
||||
Rc = 8'd12;
|
||||
Rd = 8'd12;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_strip_first_pixel = 1'b0;
|
||||
wait (run_recon_count == 4);
|
||||
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 8'd20;
|
||||
pixel_x = 13'd1;
|
||||
pixel_y = 13'd3;
|
||||
Ra = 8'd12;
|
||||
Rb = 8'd0;
|
||||
Rc = 8'd3;
|
||||
Rd = 8'd40;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
wait (run_segment_count == 3);
|
||||
|
||||
@(negedge clk);
|
||||
run_segment_done = 1'b1;
|
||||
@(posedge clk);
|
||||
run_segment_done = 1'b0;
|
||||
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
regular_count <= 0;
|
||||
run_segment_count <= 0;
|
||||
run_recon_count <= 0;
|
||||
end else begin
|
||||
if (regular_valid && regular_ready) begin
|
||||
regular_count <= regular_count + 1;
|
||||
|
||||
if (regular_sample !== 8'd42 || regular_x !== 13'd0 || regular_Rd !== 8'd20) begin
|
||||
$fatal(1, "regular event fields mismatch");
|
||||
end
|
||||
end
|
||||
|
||||
if (run_recon_valid && run_recon_ready) begin
|
||||
run_recon_count <= run_recon_count + 1;
|
||||
|
||||
if (run_recon_sample !== Ra) begin
|
||||
$fatal(1, "run recon sample should equal Ra");
|
||||
end
|
||||
end
|
||||
|
||||
if (run_segment_valid && run_segment_ready) begin
|
||||
run_segment_count <= run_segment_count + 1;
|
||||
|
||||
case (run_segment_count)
|
||||
0: begin
|
||||
if (run_length !== 13'd1 || run_end_of_line !== 1'b0 ||
|
||||
run_interruption_valid !== 1'b1 || run_interruption_sample !== 8'd7 ||
|
||||
run_interruption_x !== 13'd1) begin
|
||||
$fatal(1, "interruption run segment mismatch");
|
||||
end
|
||||
end
|
||||
|
||||
1: begin
|
||||
if (run_length !== 13'd2 || run_end_of_line !== 1'b1 ||
|
||||
run_interruption_valid !== 1'b0) begin
|
||||
$fatal(1, "EOL run segment mismatch");
|
||||
end
|
||||
end
|
||||
|
||||
2: begin
|
||||
if (run_length !== 13'd1 || run_end_of_line !== 1'b0 ||
|
||||
run_interruption_valid !== 1'b1 || run_interruption_sample !== 8'd20 ||
|
||||
run_interruption_x !== 13'd1) begin
|
||||
$fatal(1, "post-run interruption segment mismatch");
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
$fatal(1, "Unexpected extra run segment");
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (5000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for mode-router smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (regular_count !== 1 || run_segment_count !== 3 || run_recon_count !== 4) begin
|
||||
$fatal(1, "mode-router count mismatch");
|
||||
end
|
||||
$display("PASS: tb_jls_mode_router");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
162
fpga/sim/tb_jls_near_ctrl.sv
Normal file
162
fpga/sim/tb_jls_near_ctrl.sv
Normal file
@@ -0,0 +1,162 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.3 scan header NEAR parameter; Annex A uses NEAR in coding
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Project dynamic NEAR control around the standard NEAR parameter
|
||||
// Example : ratio=2 means target bits are source bits divided by 4.
|
||||
//
|
||||
// Smoke test for jls_near_ctrl.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_near_ctrl;
|
||||
|
||||
// Test precision keeps strip source bits easy to inspect.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Dynamic NEAR controller inputs.
|
||||
logic image_start_valid;
|
||||
logic [3:0] image_ratio;
|
||||
logic strip_done_valid;
|
||||
logic [31:0] strip_pixel_count;
|
||||
logic [31:0] strip_output_bytes;
|
||||
|
||||
// Dynamic NEAR controller outputs.
|
||||
logic [5:0] current_near;
|
||||
logic [47:0] actual_bits_cumulative;
|
||||
logic [47:0] target_bits_cumulative;
|
||||
logic target_miss_at_max_near;
|
||||
logic update_busy;
|
||||
|
||||
// Test loop counter for MAX_NEAR saturation.
|
||||
int loop_index;
|
||||
|
||||
jls_near_ctrl #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.MAX_NEAR(31)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.image_start_valid(image_start_valid),
|
||||
.image_ratio(image_ratio),
|
||||
.strip_done_valid(strip_done_valid),
|
||||
.strip_pixel_count(strip_pixel_count),
|
||||
.strip_output_bytes(strip_output_bytes),
|
||||
.current_near(current_near),
|
||||
.actual_bits_cumulative(actual_bits_cumulative),
|
||||
.target_bits_cumulative(target_bits_cumulative),
|
||||
.target_miss_at_max_near(target_miss_at_max_near),
|
||||
.update_busy(update_busy)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
image_start_valid = 1'b0;
|
||||
image_ratio = 4'd0;
|
||||
strip_done_valid = 1'b0;
|
||||
strip_pixel_count = 32'd256;
|
||||
strip_output_bytes = 32'd0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
// ratio=2 targets 1/4 of source bits. A 256-pixel 8-bit strip has
|
||||
// 2048 source bits and 512 target bits.
|
||||
@(posedge clk);
|
||||
image_ratio = 4'd2;
|
||||
image_start_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
image_start_valid = 1'b0;
|
||||
|
||||
@(posedge clk);
|
||||
strip_output_bytes = 32'd100;
|
||||
strip_done_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_done_valid = 1'b0;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
if (current_near !== 6'd1 || actual_bits_cumulative !== 48'd800 ||
|
||||
target_bits_cumulative !== 48'd512) begin
|
||||
$fatal(1, "ratio=2 first strip update mismatch");
|
||||
end
|
||||
|
||||
@(posedge clk);
|
||||
strip_output_bytes = 32'd40;
|
||||
strip_done_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_done_valid = 1'b0;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
if (current_near !== 6'd2 || actual_bits_cumulative !== 48'd1120 ||
|
||||
target_bits_cumulative !== 48'd1024) begin
|
||||
$fatal(1, "ratio=2 second strip update mismatch");
|
||||
end
|
||||
|
||||
@(posedge clk);
|
||||
strip_output_bytes = 32'd10;
|
||||
strip_done_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_done_valid = 1'b0;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
if (current_near !== 6'd1 || actual_bits_cumulative !== 48'd1200 ||
|
||||
target_bits_cumulative !== 48'd1536) begin
|
||||
$fatal(1, "ratio=2 third strip update mismatch");
|
||||
end
|
||||
|
||||
// ratio=0 is lossless mode and must force NEAR back to 0.
|
||||
@(posedge clk);
|
||||
image_ratio = 4'd0;
|
||||
image_start_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
image_start_valid = 1'b0;
|
||||
|
||||
@(posedge clk);
|
||||
strip_output_bytes = 32'd100;
|
||||
strip_done_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_done_valid = 1'b0;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
if (current_near !== 6'd0 || target_miss_at_max_near !== 1'b0) begin
|
||||
$fatal(1, "lossless ratio should force NEAR to 0 without target miss");
|
||||
end
|
||||
|
||||
// Repeated over-target strips saturate at MAX_NEAR and then set the sticky
|
||||
// miss flag on the next over-target completion.
|
||||
@(posedge clk);
|
||||
image_ratio = 4'd1;
|
||||
image_start_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
image_start_valid = 1'b0;
|
||||
|
||||
for (loop_index = 0; loop_index < 32; loop_index = loop_index + 1) begin
|
||||
@(posedge clk);
|
||||
strip_output_bytes = 32'd1000;
|
||||
strip_done_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
strip_done_valid = 1'b0;
|
||||
@(posedge clk);
|
||||
end
|
||||
|
||||
#1;
|
||||
if (current_near !== 6'd31 || target_miss_at_max_near !== 1'b1) begin
|
||||
$fatal(1, "MAX_NEAR saturation or target miss flag mismatch");
|
||||
end
|
||||
|
||||
$display("PASS: tb_jls_near_ctrl");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
242
fpga/sim/tb_jls_neighbor_provider.sv
Normal file
242
fpga/sim/tb_jls_neighbor_provider.sv
Normal file
@@ -0,0 +1,242 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.4 prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Reconstructed neighborhood selection for Ra/Rb/Rc/Rd
|
||||
// Example : Checks top-row zeros, previous-line neighbors, and x=0 Rc
|
||||
// left-edge extension on the third row.
|
||||
//
|
||||
// Smoke test for jls_neighbor_provider.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_neighbor_provider;
|
||||
|
||||
// Small smoke-test precision and line width.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
localparam int MAX_PIC_COL = 8;
|
||||
localparam int EVENT_COUNT = 10;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Source pixel interface.
|
||||
logic pixel_valid;
|
||||
logic pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic pixel_row_last;
|
||||
logic strip_first_pixel;
|
||||
logic strip_last_pixel;
|
||||
logic [12:0] strip_width;
|
||||
logic [5:0] NEAR;
|
||||
|
||||
// Neighborhood event interface.
|
||||
logic neigh_valid;
|
||||
logic neigh_ready;
|
||||
logic [PIX_WIDTH-1:0] neigh_sample;
|
||||
logic [12:0] neigh_x;
|
||||
logic [12:0] neigh_y;
|
||||
logic neigh_strip_first_pixel;
|
||||
logic neigh_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
logic [PIX_WIDTH-1:0] Rc;
|
||||
logic [PIX_WIDTH-1:0] Rd;
|
||||
|
||||
// Reconstructed writeback interface.
|
||||
logic recon_valid;
|
||||
logic recon_ready;
|
||||
logic [PIX_WIDTH-1:0] recon_sample;
|
||||
logic [12:0] recon_x;
|
||||
logic [12:0] recon_y;
|
||||
|
||||
// Test vectors and scoreboard.
|
||||
logic [PIX_WIDTH-1:0] sample_mem [0:EVENT_COUNT-1];
|
||||
logic [12:0] x_mem [0:EVENT_COUNT-1];
|
||||
logic [12:0] y_mem [0:EVENT_COUNT-1];
|
||||
logic first_mem [0:EVENT_COUNT-1];
|
||||
logic last_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] recon_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Ra_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rb_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rc_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rd_mem [0:EVENT_COUNT-1];
|
||||
int drive_index;
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_neighbor_provider #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.MAX_PIC_COL(MAX_PIC_COL)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.pixel_row_last(pixel_row_last),
|
||||
.strip_first_pixel(strip_first_pixel),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.NEAR(NEAR),
|
||||
.neigh_valid(neigh_valid),
|
||||
.neigh_ready(neigh_ready),
|
||||
.neigh_sample(neigh_sample),
|
||||
.neigh_x(neigh_x),
|
||||
.neigh_y(neigh_y),
|
||||
.neigh_strip_first_pixel(neigh_strip_first_pixel),
|
||||
.neigh_strip_last_pixel(neigh_strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.Rc(Rc),
|
||||
.Rd(Rd),
|
||||
.recon_valid(recon_valid),
|
||||
.recon_ready(recon_ready),
|
||||
.recon_sample(recon_sample),
|
||||
.recon_x(recon_x),
|
||||
.recon_y(recon_y)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_row_last = 1'b0;
|
||||
if (pixel_x == (strip_width - 13'd1)) begin
|
||||
pixel_row_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
sample_mem[0] = 8'd10; x_mem[0] = 13'd0; y_mem[0] = 13'd0; first_mem[0] = 1'b1; last_mem[0] = 1'b0;
|
||||
sample_mem[1] = 8'd20; x_mem[1] = 13'd1; y_mem[1] = 13'd0; first_mem[1] = 1'b0; last_mem[1] = 1'b0;
|
||||
sample_mem[2] = 8'd30; x_mem[2] = 13'd2; y_mem[2] = 13'd0; first_mem[2] = 1'b0; last_mem[2] = 1'b0;
|
||||
sample_mem[3] = 8'd11; x_mem[3] = 13'd0; y_mem[3] = 13'd1; first_mem[3] = 1'b0; last_mem[3] = 1'b0;
|
||||
sample_mem[4] = 8'd21; x_mem[4] = 13'd1; y_mem[4] = 13'd1; first_mem[4] = 1'b0; last_mem[4] = 1'b0;
|
||||
sample_mem[5] = 8'd31; x_mem[5] = 13'd2; y_mem[5] = 13'd1; first_mem[5] = 1'b0; last_mem[5] = 1'b0;
|
||||
sample_mem[6] = 8'd12; x_mem[6] = 13'd0; y_mem[6] = 13'd2; first_mem[6] = 1'b0; last_mem[6] = 1'b0;
|
||||
sample_mem[7] = 8'd22; x_mem[7] = 13'd1; y_mem[7] = 13'd2; first_mem[7] = 1'b0; last_mem[7] = 1'b0;
|
||||
sample_mem[8] = 8'd32; x_mem[8] = 13'd2; y_mem[8] = 13'd2; first_mem[8] = 1'b0; last_mem[8] = 1'b1;
|
||||
sample_mem[9] = 8'd42; x_mem[9] = 13'd0; y_mem[9] = 13'd3; first_mem[9] = 1'b1; last_mem[9] = 1'b0;
|
||||
|
||||
recon_mem[0] = 8'd10;
|
||||
recon_mem[1] = 8'd20;
|
||||
recon_mem[2] = 8'd30;
|
||||
recon_mem[3] = 8'd11;
|
||||
recon_mem[4] = 8'd21;
|
||||
recon_mem[5] = 8'd31;
|
||||
recon_mem[6] = 8'd12;
|
||||
recon_mem[7] = 8'd22;
|
||||
recon_mem[8] = 8'd32;
|
||||
recon_mem[9] = 8'd42;
|
||||
|
||||
exp_Ra_mem[0] = 8'd0; exp_Rb_mem[0] = 8'd0; exp_Rc_mem[0] = 8'd0; exp_Rd_mem[0] = 8'd0;
|
||||
exp_Ra_mem[1] = 8'd10; exp_Rb_mem[1] = 8'd0; exp_Rc_mem[1] = 8'd0; exp_Rd_mem[1] = 8'd0;
|
||||
exp_Ra_mem[2] = 8'd20; exp_Rb_mem[2] = 8'd0; exp_Rc_mem[2] = 8'd0; exp_Rd_mem[2] = 8'd0;
|
||||
exp_Ra_mem[3] = 8'd10; exp_Rb_mem[3] = 8'd10; exp_Rc_mem[3] = 8'd0; exp_Rd_mem[3] = 8'd20;
|
||||
exp_Ra_mem[4] = 8'd11; exp_Rb_mem[4] = 8'd20; exp_Rc_mem[4] = 8'd10; exp_Rd_mem[4] = 8'd30;
|
||||
exp_Ra_mem[5] = 8'd21; exp_Rb_mem[5] = 8'd30; exp_Rc_mem[5] = 8'd20; exp_Rd_mem[5] = 8'd30;
|
||||
exp_Ra_mem[6] = 8'd11; exp_Rb_mem[6] = 8'd11; exp_Rc_mem[6] = 8'd10; exp_Rd_mem[6] = 8'd21;
|
||||
exp_Ra_mem[7] = 8'd12; exp_Rb_mem[7] = 8'd21; exp_Rc_mem[7] = 8'd11; exp_Rd_mem[7] = 8'd31;
|
||||
exp_Ra_mem[8] = 8'd22; exp_Rb_mem[8] = 8'd31; exp_Rc_mem[8] = 8'd21; exp_Rd_mem[8] = 8'd31;
|
||||
exp_Ra_mem[9] = 8'd0; exp_Rb_mem[9] = 8'd0; exp_Rc_mem[9] = 8'd0; exp_Rd_mem[9] = 8'd0;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 8'd0;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
strip_width = 13'd3;
|
||||
NEAR = 6'd0;
|
||||
neigh_ready = 1'b1;
|
||||
recon_valid = 1'b0;
|
||||
recon_sample = 8'd0;
|
||||
recon_x = 13'd0;
|
||||
recon_y = 13'd0;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
for (drive_index = 0; drive_index < EVENT_COUNT; drive_index = drive_index + 1) begin
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = sample_mem[drive_index];
|
||||
pixel_x = x_mem[drive_index];
|
||||
pixel_y = y_mem[drive_index];
|
||||
strip_first_pixel = first_mem[drive_index];
|
||||
strip_last_pixel = last_mem[drive_index];
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
|
||||
wait (neigh_valid);
|
||||
@(negedge clk);
|
||||
recon_valid = 1'b1;
|
||||
recon_sample = recon_mem[drive_index];
|
||||
recon_x = x_mem[drive_index];
|
||||
recon_y = y_mem[drive_index];
|
||||
wait (recon_ready);
|
||||
@(posedge clk);
|
||||
recon_valid = 1'b0;
|
||||
end
|
||||
|
||||
wait (receive_index == EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (neigh_valid && neigh_ready) begin
|
||||
if (receive_index >= EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra neighbor event");
|
||||
end
|
||||
|
||||
if (neigh_sample !== sample_mem[receive_index] ||
|
||||
neigh_x !== x_mem[receive_index] ||
|
||||
neigh_y !== y_mem[receive_index] ||
|
||||
neigh_strip_first_pixel !== first_mem[receive_index] ||
|
||||
neigh_strip_last_pixel !== last_mem[receive_index]) begin
|
||||
$fatal(1, "neighbor metadata mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (Ra !== exp_Ra_mem[receive_index] ||
|
||||
Rb !== exp_Rb_mem[receive_index] ||
|
||||
Rc !== exp_Rc_mem[receive_index] ||
|
||||
Rd !== exp_Rd_mem[receive_index]) begin
|
||||
$fatal(1, "neighbor mismatch at %0d: Ra=%0d Rb=%0d Rc=%0d Rd=%0d",
|
||||
receive_index, Ra, Rb, Rc, Rd);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for neighbor-provider smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_neighbor_provider");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
219
fpga/sim/tb_jls_neighbor_provider_lossless_fast.sv
Normal file
219
fpga/sim/tb_jls_neighbor_provider_lossless_fast.sv
Normal file
@@ -0,0 +1,219 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.4 prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Reconstructed neighborhood selection for Ra/Rb/Rc/Rd
|
||||
// Example : NEAR=0 lossless strips may commit X as Rx without waiting for
|
||||
// reconstructed writeback.
|
||||
//
|
||||
// Smoke test for the jls_neighbor_provider lossless fast path. Six pixels
|
||||
// are accepted on consecutive cycles while recon_valid stays low; this proves
|
||||
// the NEAR=0 path does not depend on later reconstructed feedback.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_neighbor_provider_lossless_fast;
|
||||
|
||||
// Small smoke-test precision and line width.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
localparam int MAX_PIC_COL = 8;
|
||||
localparam int EVENT_COUNT = 6;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Source pixel interface.
|
||||
logic pixel_valid;
|
||||
logic pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic pixel_row_last;
|
||||
logic strip_first_pixel;
|
||||
logic strip_last_pixel;
|
||||
logic [12:0] strip_width;
|
||||
logic [5:0] NEAR;
|
||||
|
||||
// Neighborhood event interface.
|
||||
logic neigh_valid;
|
||||
logic neigh_ready;
|
||||
logic [PIX_WIDTH-1:0] neigh_sample;
|
||||
logic [12:0] neigh_x;
|
||||
logic [12:0] neigh_y;
|
||||
logic neigh_strip_first_pixel;
|
||||
logic neigh_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
logic [PIX_WIDTH-1:0] Rc;
|
||||
logic [PIX_WIDTH-1:0] Rd;
|
||||
|
||||
// Reconstructed writeback interface remains idle in this test.
|
||||
logic recon_valid;
|
||||
logic recon_ready;
|
||||
logic [PIX_WIDTH-1:0] recon_sample;
|
||||
logic [12:0] recon_x;
|
||||
logic [12:0] recon_y;
|
||||
|
||||
// Test vectors and scoreboard.
|
||||
logic [PIX_WIDTH-1:0] sample_mem [0:EVENT_COUNT-1];
|
||||
logic [12:0] x_mem [0:EVENT_COUNT-1];
|
||||
logic [12:0] y_mem [0:EVENT_COUNT-1];
|
||||
logic first_mem [0:EVENT_COUNT-1];
|
||||
logic last_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Ra_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rb_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rc_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rd_mem [0:EVENT_COUNT-1];
|
||||
int drive_index;
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_neighbor_provider #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.MAX_PIC_COL(MAX_PIC_COL)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.pixel_row_last(pixel_row_last),
|
||||
.strip_first_pixel(strip_first_pixel),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.NEAR(NEAR),
|
||||
.neigh_valid(neigh_valid),
|
||||
.neigh_ready(neigh_ready),
|
||||
.neigh_sample(neigh_sample),
|
||||
.neigh_x(neigh_x),
|
||||
.neigh_y(neigh_y),
|
||||
.neigh_strip_first_pixel(neigh_strip_first_pixel),
|
||||
.neigh_strip_last_pixel(neigh_strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.Rc(Rc),
|
||||
.Rd(Rd),
|
||||
.recon_valid(recon_valid),
|
||||
.recon_ready(recon_ready),
|
||||
.recon_sample(recon_sample),
|
||||
.recon_x(recon_x),
|
||||
.recon_y(recon_y)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_row_last = 1'b0;
|
||||
if (pixel_x == (strip_width - 13'd1)) begin
|
||||
pixel_row_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
sample_mem[0] = 8'd10; x_mem[0] = 13'd0; y_mem[0] = 13'd0; first_mem[0] = 1'b1; last_mem[0] = 1'b0;
|
||||
sample_mem[1] = 8'd20; x_mem[1] = 13'd1; y_mem[1] = 13'd0; first_mem[1] = 1'b0; last_mem[1] = 1'b0;
|
||||
sample_mem[2] = 8'd30; x_mem[2] = 13'd2; y_mem[2] = 13'd0; first_mem[2] = 1'b0; last_mem[2] = 1'b0;
|
||||
sample_mem[3] = 8'd11; x_mem[3] = 13'd0; y_mem[3] = 13'd1; first_mem[3] = 1'b0; last_mem[3] = 1'b0;
|
||||
sample_mem[4] = 8'd21; x_mem[4] = 13'd1; y_mem[4] = 13'd1; first_mem[4] = 1'b0; last_mem[4] = 1'b0;
|
||||
sample_mem[5] = 8'd31; x_mem[5] = 13'd2; y_mem[5] = 13'd1; first_mem[5] = 1'b0; last_mem[5] = 1'b1;
|
||||
|
||||
exp_Ra_mem[0] = 8'd0; exp_Rb_mem[0] = 8'd0; exp_Rc_mem[0] = 8'd0; exp_Rd_mem[0] = 8'd0;
|
||||
exp_Ra_mem[1] = 8'd10; exp_Rb_mem[1] = 8'd0; exp_Rc_mem[1] = 8'd0; exp_Rd_mem[1] = 8'd0;
|
||||
exp_Ra_mem[2] = 8'd20; exp_Rb_mem[2] = 8'd0; exp_Rc_mem[2] = 8'd0; exp_Rd_mem[2] = 8'd0;
|
||||
exp_Ra_mem[3] = 8'd10; exp_Rb_mem[3] = 8'd10; exp_Rc_mem[3] = 8'd0; exp_Rd_mem[3] = 8'd20;
|
||||
exp_Ra_mem[4] = 8'd11; exp_Rb_mem[4] = 8'd20; exp_Rc_mem[4] = 8'd10; exp_Rd_mem[4] = 8'd30;
|
||||
exp_Ra_mem[5] = 8'd21; exp_Rb_mem[5] = 8'd30; exp_Rc_mem[5] = 8'd20; exp_Rd_mem[5] = 8'd30;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 8'd0;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
strip_width = 13'd3;
|
||||
NEAR = 6'd0;
|
||||
neigh_ready = 1'b1;
|
||||
recon_valid = 1'b0;
|
||||
recon_sample = 8'd0;
|
||||
recon_x = 13'd0;
|
||||
recon_y = 13'd0;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
for (drive_index = 0; drive_index < EVENT_COUNT; drive_index = drive_index + 1) begin
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = sample_mem[drive_index];
|
||||
pixel_x = x_mem[drive_index];
|
||||
pixel_y = y_mem[drive_index];
|
||||
strip_first_pixel = first_mem[drive_index];
|
||||
strip_last_pixel = last_mem[drive_index];
|
||||
#1;
|
||||
if (!pixel_ready) begin
|
||||
$fatal(1, "lossless fast path inserted a source stall at %0d", drive_index);
|
||||
end
|
||||
@(posedge clk);
|
||||
end
|
||||
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
|
||||
wait (receive_index == EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (neigh_valid && neigh_ready) begin
|
||||
if (receive_index >= EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra neighbor event");
|
||||
end
|
||||
|
||||
if (neigh_sample !== sample_mem[receive_index] ||
|
||||
neigh_x !== x_mem[receive_index] ||
|
||||
neigh_y !== y_mem[receive_index] ||
|
||||
neigh_strip_first_pixel !== first_mem[receive_index] ||
|
||||
neigh_strip_last_pixel !== last_mem[receive_index]) begin
|
||||
$fatal(1, "neighbor metadata mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (Ra !== exp_Ra_mem[receive_index] ||
|
||||
Rb !== exp_Rb_mem[receive_index] ||
|
||||
Rc !== exp_Rc_mem[receive_index] ||
|
||||
Rd !== exp_Rd_mem[receive_index]) begin
|
||||
$fatal(1, "neighbor mismatch at %0d: Ra=%0d Rb=%0d Rc=%0d Rd=%0d",
|
||||
receive_index, Ra, Rb, Rc, Rd);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for neighbor-provider lossless-fast smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_neighbor_provider_lossless_fast");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
305
fpga/sim/tb_jls_neighbor_provider_near_bypass.sv
Normal file
305
fpga/sim/tb_jls_neighbor_provider_near_bypass.sv
Normal file
@@ -0,0 +1,305 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.4 prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Reconstructed neighborhood selection for Ra/Rb/Rc/Rd
|
||||
// Example : In NEAR>0, a same-row pixel uses the previous pixel's returned
|
||||
// Rx as Ra after the Rx writeback is committed.
|
||||
//
|
||||
// Smoke test for the NEAR>0 reconstructed-sample writeback boundary. The test
|
||||
// accepts x=1 one cycle after x=0 writeback, and x=2 one cycle after x=1
|
||||
// writeback. It also proves that x=0 of the next row waits for the row-last
|
||||
// writeback before the row transition is accepted.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_neighbor_provider_near_bypass;
|
||||
|
||||
localparam int PIX_WIDTH = 8;
|
||||
localparam int MAX_PIC_COL = 8;
|
||||
localparam int EVENT_COUNT = 4;
|
||||
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
logic pixel_valid;
|
||||
logic pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic pixel_row_last;
|
||||
logic strip_first_pixel;
|
||||
logic strip_last_pixel;
|
||||
logic [12:0] strip_width;
|
||||
logic [5:0] NEAR;
|
||||
|
||||
logic neigh_valid;
|
||||
logic neigh_ready;
|
||||
logic [PIX_WIDTH-1:0] neigh_sample;
|
||||
logic [12:0] neigh_x;
|
||||
logic [12:0] neigh_y;
|
||||
logic neigh_strip_first_pixel;
|
||||
logic neigh_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
logic [PIX_WIDTH-1:0] Rc;
|
||||
logic [PIX_WIDTH-1:0] Rd;
|
||||
|
||||
logic recon_valid;
|
||||
logic recon_ready;
|
||||
logic [PIX_WIDTH-1:0] recon_sample;
|
||||
logic [12:0] recon_x;
|
||||
logic [12:0] recon_y;
|
||||
|
||||
logic [PIX_WIDTH-1:0] sample_mem [0:EVENT_COUNT-1];
|
||||
logic [12:0] x_mem [0:EVENT_COUNT-1];
|
||||
logic [12:0] y_mem [0:EVENT_COUNT-1];
|
||||
logic first_mem [0:EVENT_COUNT-1];
|
||||
logic last_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Ra_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rb_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rc_mem [0:EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] exp_Rd_mem [0:EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_neighbor_provider #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.MAX_PIC_COL(MAX_PIC_COL)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.pixel_row_last(pixel_row_last),
|
||||
.strip_first_pixel(strip_first_pixel),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.NEAR(NEAR),
|
||||
.neigh_valid(neigh_valid),
|
||||
.neigh_ready(neigh_ready),
|
||||
.neigh_sample(neigh_sample),
|
||||
.neigh_x(neigh_x),
|
||||
.neigh_y(neigh_y),
|
||||
.neigh_strip_first_pixel(neigh_strip_first_pixel),
|
||||
.neigh_strip_last_pixel(neigh_strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.Rc(Rc),
|
||||
.Rd(Rd),
|
||||
.recon_valid(recon_valid),
|
||||
.recon_ready(recon_ready),
|
||||
.recon_sample(recon_sample),
|
||||
.recon_x(recon_x),
|
||||
.recon_y(recon_y)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_row_last = 1'b0;
|
||||
if (pixel_x == (strip_width - 13'd1)) begin
|
||||
pixel_row_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
sample_mem[0] = 8'd10; x_mem[0] = 13'd0; y_mem[0] = 13'd0; first_mem[0] = 1'b1; last_mem[0] = 1'b0;
|
||||
sample_mem[1] = 8'd20; x_mem[1] = 13'd1; y_mem[1] = 13'd0; first_mem[1] = 1'b0; last_mem[1] = 1'b0;
|
||||
sample_mem[2] = 8'd30; x_mem[2] = 13'd2; y_mem[2] = 13'd0; first_mem[2] = 1'b0; last_mem[2] = 1'b0;
|
||||
sample_mem[3] = 8'd40; x_mem[3] = 13'd0; y_mem[3] = 13'd1; first_mem[3] = 1'b0; last_mem[3] = 1'b1;
|
||||
|
||||
exp_Ra_mem[0] = 8'd0; exp_Rb_mem[0] = 8'd0; exp_Rc_mem[0] = 8'd0; exp_Rd_mem[0] = 8'd0;
|
||||
exp_Ra_mem[1] = 8'd101; exp_Rb_mem[1] = 8'd0; exp_Rc_mem[1] = 8'd0; exp_Rd_mem[1] = 8'd0;
|
||||
exp_Ra_mem[2] = 8'd102; exp_Rb_mem[2] = 8'd0; exp_Rc_mem[2] = 8'd0; exp_Rd_mem[2] = 8'd0;
|
||||
exp_Ra_mem[3] = 8'd101; exp_Rb_mem[3] = 8'd101; exp_Rc_mem[3] = 8'd0; exp_Rd_mem[3] = 8'd102;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 8'd0;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
strip_width = 13'd3;
|
||||
NEAR = 6'd1;
|
||||
neigh_ready = 1'b1;
|
||||
recon_valid = 1'b0;
|
||||
recon_sample = 8'd0;
|
||||
recon_x = 13'd0;
|
||||
recon_y = 13'd0;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = sample_mem[0];
|
||||
pixel_x = x_mem[0];
|
||||
pixel_y = y_mem[0];
|
||||
strip_first_pixel = first_mem[0];
|
||||
strip_last_pixel = last_mem[0];
|
||||
#1;
|
||||
if (!pixel_ready) begin
|
||||
$fatal(1, "first pixel was not accepted");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
pixel_valid = 1'b0;
|
||||
strip_first_pixel = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
recon_valid = 1'b1;
|
||||
recon_sample = 8'd101;
|
||||
recon_x = x_mem[0];
|
||||
recon_y = y_mem[0];
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = sample_mem[1];
|
||||
pixel_x = x_mem[1];
|
||||
pixel_y = y_mem[1];
|
||||
#1;
|
||||
if (!recon_ready) begin
|
||||
$fatal(1, "x=0 writeback was not ready");
|
||||
end
|
||||
if (pixel_ready) begin
|
||||
$fatal(1, "x=1 was accepted before x=0 writeback committed");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
recon_valid = 1'b0;
|
||||
@(negedge clk);
|
||||
#1;
|
||||
if (!pixel_ready) begin
|
||||
$fatal(1, "x=1 was not accepted after x=0 writeback committed");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
pixel_valid = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
recon_valid = 1'b1;
|
||||
recon_sample = 8'd102;
|
||||
recon_x = x_mem[1];
|
||||
recon_y = y_mem[1];
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = sample_mem[2];
|
||||
pixel_x = x_mem[2];
|
||||
pixel_y = y_mem[2];
|
||||
#1;
|
||||
if (!recon_ready) begin
|
||||
$fatal(1, "x=1 writeback was not ready");
|
||||
end
|
||||
if (pixel_ready) begin
|
||||
$fatal(1, "x=2 was accepted before x=1 writeback committed");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
recon_valid = 1'b0;
|
||||
@(negedge clk);
|
||||
#1;
|
||||
if (!pixel_ready) begin
|
||||
$fatal(1, "x=2 was not accepted after x=1 writeback committed");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
pixel_valid = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
recon_valid = 1'b1;
|
||||
recon_sample = 8'd103;
|
||||
recon_x = x_mem[2];
|
||||
recon_y = y_mem[2];
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = sample_mem[3];
|
||||
pixel_x = x_mem[3];
|
||||
pixel_y = y_mem[3];
|
||||
strip_last_pixel = last_mem[3];
|
||||
#1;
|
||||
if (!recon_ready) begin
|
||||
$fatal(1, "row-last writeback was not ready");
|
||||
end
|
||||
if (pixel_ready) begin
|
||||
$fatal(1, "row transition was incorrectly bypassed");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
recon_valid = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
#1;
|
||||
if (!pixel_ready) begin
|
||||
$fatal(1, "next row first pixel was not accepted after row transition");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
pixel_valid = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
recon_valid = 1'b1;
|
||||
recon_sample = 8'd104;
|
||||
recon_x = x_mem[3];
|
||||
recon_y = y_mem[3];
|
||||
#1;
|
||||
if (!recon_ready) begin
|
||||
$fatal(1, "final writeback was not ready");
|
||||
end
|
||||
@(posedge clk);
|
||||
#1;
|
||||
recon_valid = 1'b0;
|
||||
|
||||
wait (receive_index == EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (neigh_valid && neigh_ready) begin
|
||||
if (receive_index >= EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra neighbor event");
|
||||
end
|
||||
|
||||
if (neigh_sample !== sample_mem[receive_index] ||
|
||||
neigh_x !== x_mem[receive_index] ||
|
||||
neigh_y !== y_mem[receive_index] ||
|
||||
neigh_strip_first_pixel !== first_mem[receive_index] ||
|
||||
neigh_strip_last_pixel !== last_mem[receive_index]) begin
|
||||
$fatal(1, "neighbor metadata mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (Ra !== exp_Ra_mem[receive_index] ||
|
||||
Rb !== exp_Rb_mem[receive_index] ||
|
||||
Rc !== exp_Rc_mem[receive_index] ||
|
||||
Rd !== exp_Rd_mem[receive_index]) begin
|
||||
$fatal(1, "neighbor mismatch at %0d: Ra=%0d Rb=%0d Rc=%0d Rd=%0d",
|
||||
receive_index, Ra, Rb, Rc, Rd);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for neighbor-provider near-bypass smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_neighbor_provider_near_bypass");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
167
fpga/sim/tb_jls_output_buffer.sv
Normal file
167
fpga/sim/tb_jls_output_buffer.sv
Normal file
@@ -0,0 +1,167 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 marker stream byte order
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Encoded byte stream delivery after JPEG-LS bit packing
|
||||
// Example : Checks byte order and ofifo_wdata[8] placement.
|
||||
//
|
||||
// Smoke test for jls_output_buffer.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_output_buffer;
|
||||
|
||||
// Small buffer for fast smoke simulation.
|
||||
localparam int OUT_BUF_BYTES = 8;
|
||||
localparam int OUT_BUF_AFULL_MARGIN = 2;
|
||||
localparam int TEST_BYTE_COUNT = 6;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Encoded byte event interface.
|
||||
logic byte_valid;
|
||||
logic byte_ready;
|
||||
logic [7:0] byte_data;
|
||||
logic original_image_start;
|
||||
logic byte_accepted;
|
||||
logic pause_req;
|
||||
logic [$clog2(OUT_BUF_BYTES + 1)-1:0] buffer_level;
|
||||
|
||||
// Output FIFO interface under test.
|
||||
logic ofifo_wclk;
|
||||
logic ofifo_wr;
|
||||
logic [8:0] ofifo_wdata;
|
||||
logic ofifo_full;
|
||||
logic ofifo_alfull;
|
||||
|
||||
// Test input and expected output streams.
|
||||
logic [7:0] input_byte_mem [0:TEST_BYTE_COUNT-1];
|
||||
logic input_start_mem [0:TEST_BYTE_COUNT-1];
|
||||
logic [8:0] expected_word_mem [0:TEST_BYTE_COUNT-1];
|
||||
int send_index;
|
||||
int receive_index;
|
||||
logic done_seen;
|
||||
|
||||
jls_output_buffer #(
|
||||
.OUT_BUF_BYTES(OUT_BUF_BYTES),
|
||||
.OUT_BUF_AFULL_MARGIN(OUT_BUF_AFULL_MARGIN)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.byte_valid(byte_valid),
|
||||
.byte_ready(byte_ready),
|
||||
.byte_data(byte_data),
|
||||
.original_image_start(original_image_start),
|
||||
.byte_accepted(byte_accepted),
|
||||
.pause_req(pause_req),
|
||||
.buffer_level(buffer_level),
|
||||
.ofifo_wclk(ofifo_wclk),
|
||||
.ofifo_wr(ofifo_wr),
|
||||
.ofifo_wdata(ofifo_wdata),
|
||||
.ofifo_full(ofifo_full),
|
||||
.ofifo_alfull(ofifo_alfull)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
input_byte_mem[0] = 8'hFF;
|
||||
input_byte_mem[1] = 8'hD8;
|
||||
input_byte_mem[2] = 8'h12;
|
||||
input_byte_mem[3] = 8'h34;
|
||||
input_byte_mem[4] = 8'h56;
|
||||
input_byte_mem[5] = 8'hD9;
|
||||
|
||||
input_start_mem[0] = 1'b1;
|
||||
input_start_mem[1] = 1'b0;
|
||||
input_start_mem[2] = 1'b0;
|
||||
input_start_mem[3] = 1'b0;
|
||||
input_start_mem[4] = 1'b0;
|
||||
input_start_mem[5] = 1'b0;
|
||||
|
||||
expected_word_mem[0] = 9'h1FF;
|
||||
expected_word_mem[1] = 9'h0D8;
|
||||
expected_word_mem[2] = 9'h012;
|
||||
expected_word_mem[3] = 9'h034;
|
||||
expected_word_mem[4] = 9'h056;
|
||||
expected_word_mem[5] = 9'h0D9;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
byte_valid = 1'b0;
|
||||
byte_data = 8'h00;
|
||||
original_image_start = 1'b0;
|
||||
ofifo_full = 1'b0;
|
||||
ofifo_alfull = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
for (send_index = 0; send_index < TEST_BYTE_COUNT; send_index = send_index + 1) begin
|
||||
@(posedge clk);
|
||||
byte_valid = 1'b1;
|
||||
byte_data = input_byte_mem[send_index];
|
||||
original_image_start = input_start_mem[send_index];
|
||||
end
|
||||
|
||||
@(posedge clk);
|
||||
byte_valid = 1'b0;
|
||||
byte_data = 8'h00;
|
||||
original_image_start = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
done_seen <= 1'b0;
|
||||
end else begin
|
||||
if (ofifo_wclk !== clk) begin
|
||||
$fatal(1, "ofifo_wclk is not tied to clk");
|
||||
end
|
||||
|
||||
if (byte_valid && !byte_ready) begin
|
||||
$fatal(1, "byte_ready unexpectedly deasserted in smoke test");
|
||||
end
|
||||
|
||||
if (ofifo_wr) begin
|
||||
if (receive_index >= TEST_BYTE_COUNT) begin
|
||||
$fatal(1, "Unexpected extra output word 0x%03h", ofifo_wdata);
|
||||
end
|
||||
|
||||
if (ofifo_wdata !== expected_word_mem[receive_index]) begin
|
||||
$fatal(1, "ofifo word mismatch at %0d: got 0x%03h expected 0x%03h",
|
||||
receive_index, ofifo_wdata, expected_word_mem[receive_index]);
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
if (receive_index == (TEST_BYTE_COUNT - 1)) begin
|
||||
done_seen <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (1000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for output buffer stream");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (receive_index !== TEST_BYTE_COUNT) begin
|
||||
$fatal(1, "receive_index mismatch: got %0d", receive_index);
|
||||
end
|
||||
$display("PASS: tb_jls_output_buffer");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
238
fpga/sim/tb_jls_prediction_corrector.sv
Normal file
238
fpga/sim/tb_jls_prediction_corrector.sv
Normal file
@@ -0,0 +1,238 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex A.6 bias variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Px correction by C[Q] followed by bounds correction
|
||||
// Example : Checks normal correction and 0..MAXVAL saturation.
|
||||
//
|
||||
// Smoke test for jls_prediction_corrector.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_prediction_corrector;
|
||||
|
||||
// Test precision.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
|
||||
// Expected corrected events.
|
||||
localparam int EXPECTED_EVENT_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Input context event.
|
||||
logic context_valid;
|
||||
logic context_ready;
|
||||
logic [PIX_WIDTH-1:0] context_sample;
|
||||
logic [12:0] context_x;
|
||||
logic [12:0] context_y;
|
||||
logic context_strip_first_pixel;
|
||||
logic context_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Px;
|
||||
logic [31:0] A;
|
||||
logic signed [31:0] B;
|
||||
logic signed [8:0] C;
|
||||
logic [15:0] N;
|
||||
logic context_negative;
|
||||
logic [8:0] context_index;
|
||||
logic run_mode_context;
|
||||
|
||||
// Corrected output event.
|
||||
logic corrected_valid;
|
||||
logic corrected_ready;
|
||||
logic [PIX_WIDTH-1:0] corrected_sample;
|
||||
logic [12:0] corrected_x;
|
||||
logic [12:0] corrected_y;
|
||||
logic corrected_strip_first_pixel;
|
||||
logic corrected_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] corrected_Px;
|
||||
logic [8:0] corrected_context_index;
|
||||
logic corrected_context_negative;
|
||||
logic corrected_run_mode_context;
|
||||
logic [31:0] corrected_A;
|
||||
logic signed [31:0] corrected_B;
|
||||
logic signed [8:0] corrected_C;
|
||||
logic [15:0] corrected_N;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [PIX_WIDTH-1:0] expected_px_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_prediction_corrector #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.context_valid(context_valid),
|
||||
.context_ready(context_ready),
|
||||
.context_sample(context_sample),
|
||||
.context_x(context_x),
|
||||
.context_y(context_y),
|
||||
.context_strip_first_pixel(context_strip_first_pixel),
|
||||
.context_strip_last_pixel(context_strip_last_pixel),
|
||||
.Px(Px),
|
||||
.A(A),
|
||||
.B(B),
|
||||
.C(C),
|
||||
.N(N),
|
||||
.context_negative(context_negative),
|
||||
.context_index(context_index),
|
||||
.run_mode_context(run_mode_context),
|
||||
.corrected_valid(corrected_valid),
|
||||
.corrected_ready(corrected_ready),
|
||||
.corrected_sample(corrected_sample),
|
||||
.corrected_x(corrected_x),
|
||||
.corrected_y(corrected_y),
|
||||
.corrected_strip_first_pixel(corrected_strip_first_pixel),
|
||||
.corrected_strip_last_pixel(corrected_strip_last_pixel),
|
||||
.corrected_Px(corrected_Px),
|
||||
.corrected_context_index(corrected_context_index),
|
||||
.corrected_context_negative(corrected_context_negative),
|
||||
.corrected_run_mode_context(corrected_run_mode_context),
|
||||
.corrected_A(corrected_A),
|
||||
.corrected_B(corrected_B),
|
||||
.corrected_C(corrected_C),
|
||||
.corrected_N(corrected_N)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_px_mem[0] = 8'd17;
|
||||
expected_px_mem[1] = 8'd23;
|
||||
expected_px_mem[2] = 8'd0;
|
||||
expected_px_mem[3] = 8'd255;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
context_valid = 1'b0;
|
||||
context_sample = 8'd0;
|
||||
context_x = 13'd0;
|
||||
context_y = 13'd0;
|
||||
context_strip_first_pixel = 1'b0;
|
||||
context_strip_last_pixel = 1'b0;
|
||||
Px = 8'd0;
|
||||
A = 32'd4;
|
||||
B = 32'sd0;
|
||||
C = 9'sd0;
|
||||
N = 16'd1;
|
||||
context_negative = 1'b0;
|
||||
context_index = 9'd0;
|
||||
run_mode_context = 1'b0;
|
||||
corrected_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd100;
|
||||
context_x = 13'd1;
|
||||
context_y = 13'd2;
|
||||
context_strip_first_pixel = 1'b1;
|
||||
Px = 8'd20;
|
||||
C = -9'sd3;
|
||||
context_negative = 1'b0;
|
||||
context_index = 9'd7;
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
context_strip_first_pixel = 1'b0;
|
||||
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd101;
|
||||
context_x = 13'd2;
|
||||
Px = 8'd20;
|
||||
C = -9'sd3;
|
||||
context_negative = 1'b1;
|
||||
context_index = 9'd8;
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd102;
|
||||
context_x = 13'd3;
|
||||
Px = 8'd2;
|
||||
C = -9'sd5;
|
||||
context_negative = 1'b0;
|
||||
context_index = 9'd9;
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
|
||||
wait (context_ready);
|
||||
@(posedge clk);
|
||||
context_valid = 1'b1;
|
||||
context_sample = 8'd103;
|
||||
context_x = 13'd4;
|
||||
context_strip_last_pixel = 1'b1;
|
||||
Px = 8'd250;
|
||||
C = 9'sd10;
|
||||
context_negative = 1'b0;
|
||||
context_index = 9'd10;
|
||||
@(posedge clk);
|
||||
context_valid = 1'b0;
|
||||
context_strip_last_pixel = 1'b0;
|
||||
|
||||
wait (receive_index == EXPECTED_EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (corrected_valid && corrected_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra corrected-prediction event");
|
||||
end
|
||||
|
||||
if (corrected_Px !== expected_px_mem[receive_index]) begin
|
||||
$fatal(1, "corrected_Px mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, corrected_Px, expected_px_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (corrected_sample !== (8'd100 + receive_index[7:0])) begin
|
||||
$fatal(1, "corrected_sample mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (corrected_context_index !== (9'd7 + receive_index[8:0])) begin
|
||||
$fatal(1, "corrected_context_index mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (receive_index == 0 && corrected_strip_first_pixel !== 1'b1) begin
|
||||
$fatal(1, "first event should preserve strip_first_pixel");
|
||||
end
|
||||
|
||||
if (receive_index == 3 && corrected_strip_last_pixel !== 1'b1) begin
|
||||
$fatal(1, "last event should preserve strip_last_pixel");
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for prediction corrector smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_prediction_corrector");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
239
fpga/sim/tb_jls_predictor.sv
Normal file
239
fpga/sim/tb_jls_predictor.sv
Normal file
@@ -0,0 +1,239 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.4 prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : MED predictor / Px calculation from Ra, Rb, and Rc
|
||||
// Example : Covers Rc inside, above, and below the Ra/Rb interval.
|
||||
//
|
||||
// Smoke test for jls_predictor.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_predictor;
|
||||
|
||||
// Test precision.
|
||||
localparam int PIX_WIDTH = 16;
|
||||
|
||||
// Expected predicted events.
|
||||
localparam int EXPECTED_EVENT_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Input pixel/neighborhood interface.
|
||||
logic pixel_valid;
|
||||
logic pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic strip_first_pixel;
|
||||
logic strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
logic [PIX_WIDTH-1:0] Rc;
|
||||
logic [PIX_WIDTH-1:0] Rd;
|
||||
|
||||
// Output predicted event.
|
||||
logic predict_valid;
|
||||
logic predict_ready;
|
||||
logic [PIX_WIDTH-1:0] predict_sample;
|
||||
logic [12:0] predict_x;
|
||||
logic [12:0] predict_y;
|
||||
logic predict_strip_first_pixel;
|
||||
logic predict_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] predict_Ra;
|
||||
logic [PIX_WIDTH-1:0] predict_Rb;
|
||||
logic [PIX_WIDTH-1:0] predict_Rc;
|
||||
logic [PIX_WIDTH-1:0] predict_Rd;
|
||||
logic [PIX_WIDTH-1:0] Px;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [PIX_WIDTH-1:0] expected_px_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] expected_sample_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_predictor #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.strip_first_pixel(strip_first_pixel),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.Rc(Rc),
|
||||
.Rd(Rd),
|
||||
.predict_valid(predict_valid),
|
||||
.predict_ready(predict_ready),
|
||||
.predict_sample(predict_sample),
|
||||
.predict_x(predict_x),
|
||||
.predict_y(predict_y),
|
||||
.predict_strip_first_pixel(predict_strip_first_pixel),
|
||||
.predict_strip_last_pixel(predict_strip_last_pixel),
|
||||
.predict_Ra(predict_Ra),
|
||||
.predict_Rb(predict_Rb),
|
||||
.predict_Rc(predict_Rc),
|
||||
.predict_Rd(predict_Rd),
|
||||
.Px(Px)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_px_mem[0] = 16'd15;
|
||||
expected_sample_mem[0] = 16'd100;
|
||||
expected_px_mem[1] = 16'd10;
|
||||
expected_sample_mem[1] = 16'd101;
|
||||
expected_px_mem[2] = 16'd20;
|
||||
expected_sample_mem[2] = 16'd102;
|
||||
expected_px_mem[3] = 16'd15;
|
||||
expected_sample_mem[3] = 16'd103;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 16'd0;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
Ra = 16'd0;
|
||||
Rb = 16'd0;
|
||||
Rc = 16'd0;
|
||||
Rd = 16'd0;
|
||||
predict_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 16'd100;
|
||||
pixel_x = 13'd1;
|
||||
pixel_y = 13'd2;
|
||||
strip_first_pixel = 1'b1;
|
||||
strip_last_pixel = 1'b0;
|
||||
Ra = 16'd10;
|
||||
Rb = 16'd20;
|
||||
Rc = 16'd15;
|
||||
Rd = 16'd22;
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
strip_first_pixel = 1'b0;
|
||||
|
||||
// Hold the first prediction event in the output register and prove the
|
||||
// local two-entry queue accepts one more MED prediction before applying
|
||||
// backpressure. This intentionally breaks downstream ready fan-in from
|
||||
// the upstream mode-router path.
|
||||
predict_ready = 1'b0;
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 16'd101;
|
||||
pixel_x = 13'd2;
|
||||
pixel_y = 13'd2;
|
||||
Ra = 16'd10;
|
||||
Rb = 16'd20;
|
||||
Rc = 16'd25;
|
||||
Rd = 16'd30;
|
||||
if (pixel_ready !== 1'b1) begin
|
||||
$fatal(1, "pixel_ready should accept one queued prediction while downstream is stalled");
|
||||
end
|
||||
@(posedge clk);
|
||||
@(negedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
if (pixel_ready !== 1'b0) begin
|
||||
$fatal(1, "pixel_ready should be low after the predictor queue becomes full");
|
||||
end
|
||||
predict_ready = 1'b1;
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 16'd102;
|
||||
pixel_x = 13'd3;
|
||||
pixel_y = 13'd2;
|
||||
Ra = 16'd10;
|
||||
Rb = 16'd20;
|
||||
Rc = 16'd5;
|
||||
Rd = 16'd6;
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
|
||||
wait (pixel_ready);
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = 16'd103;
|
||||
pixel_x = 13'd4;
|
||||
pixel_y = 13'd2;
|
||||
strip_last_pixel = 1'b1;
|
||||
Ra = 16'd30;
|
||||
Rb = 16'd5;
|
||||
Rc = 16'd20;
|
||||
Rd = 16'd7;
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
|
||||
wait (receive_index == EXPECTED_EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (predict_valid && predict_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra predicted event");
|
||||
end
|
||||
|
||||
if (Px !== expected_px_mem[receive_index]) begin
|
||||
$fatal(1, "Px mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, Px, expected_px_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (predict_sample !== expected_sample_mem[receive_index]) begin
|
||||
$fatal(1, "predict_sample mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (receive_index == 0 && predict_strip_first_pixel !== 1'b1) begin
|
||||
$fatal(1, "first event should preserve strip_first_pixel");
|
||||
end
|
||||
|
||||
if (receive_index == 3 && predict_strip_last_pixel !== 1'b1) begin
|
||||
$fatal(1, "last event should preserve strip_last_pixel");
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (2000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for predictor smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_predictor");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
172
fpga/sim/tb_jls_preset_defaults.sv
Normal file
172
fpga/sim/tb_jls_preset_defaults.sv
Normal file
@@ -0,0 +1,172 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.4.1.1 preset coding parameters
|
||||
// Figure : C.3 clamping function, referenced by default threshold rules
|
||||
// Table : Table C.1 valid preset parameters, Table C.2 RESET, Table C.3 defaults
|
||||
// Pseudocode : Default threshold calculation for MAXVAL >= 128
|
||||
// Example : Checks 8/10/12/14/16-bit defaults and NEAR clamp behavior.
|
||||
//
|
||||
// Smoke test for jls_preset_defaults.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_preset_defaults;
|
||||
|
||||
// Test NEAR inputs.
|
||||
logic [5:0] near_8b_0;
|
||||
logic [5:0] near_8b_31;
|
||||
logic [5:0] near_10b_4;
|
||||
logic [5:0] near_12b_1;
|
||||
logic [5:0] near_14b_2;
|
||||
logic [5:0] near_16b_63;
|
||||
|
||||
// Preset outputs for each instance under test.
|
||||
logic [15:0] maxval_8b_0;
|
||||
logic [15:0] t1_8b_0;
|
||||
logic [15:0] t2_8b_0;
|
||||
logic [15:0] t3_8b_0;
|
||||
logic [15:0] reset_8b_0;
|
||||
logic [15:0] maxval_8b_31;
|
||||
logic [15:0] t1_8b_31;
|
||||
logic [15:0] t2_8b_31;
|
||||
logic [15:0] t3_8b_31;
|
||||
logic [15:0] reset_8b_31;
|
||||
logic [15:0] maxval_10b_4;
|
||||
logic [15:0] t1_10b_4;
|
||||
logic [15:0] t2_10b_4;
|
||||
logic [15:0] t3_10b_4;
|
||||
logic [15:0] reset_10b_4;
|
||||
logic [15:0] maxval_12b_1;
|
||||
logic [15:0] t1_12b_1;
|
||||
logic [15:0] t2_12b_1;
|
||||
logic [15:0] t3_12b_1;
|
||||
logic [15:0] reset_12b_1;
|
||||
logic [15:0] maxval_14b_2;
|
||||
logic [15:0] t1_14b_2;
|
||||
logic [15:0] t2_14b_2;
|
||||
logic [15:0] t3_14b_2;
|
||||
logic [15:0] reset_14b_2;
|
||||
logic [15:0] maxval_16b_63;
|
||||
logic [15:0] t1_16b_63;
|
||||
logic [15:0] t2_16b_63;
|
||||
logic [15:0] t3_16b_63;
|
||||
logic [15:0] reset_16b_63;
|
||||
|
||||
jls_preset_defaults #(
|
||||
.PIX_WIDTH(8)
|
||||
) dut_8b_0 (
|
||||
.near(near_8b_0),
|
||||
.preset_maxval(maxval_8b_0),
|
||||
.preset_t1(t1_8b_0),
|
||||
.preset_t2(t2_8b_0),
|
||||
.preset_t3(t3_8b_0),
|
||||
.preset_reset(reset_8b_0)
|
||||
);
|
||||
|
||||
jls_preset_defaults #(
|
||||
.PIX_WIDTH(8)
|
||||
) dut_8b_31 (
|
||||
.near(near_8b_31),
|
||||
.preset_maxval(maxval_8b_31),
|
||||
.preset_t1(t1_8b_31),
|
||||
.preset_t2(t2_8b_31),
|
||||
.preset_t3(t3_8b_31),
|
||||
.preset_reset(reset_8b_31)
|
||||
);
|
||||
|
||||
jls_preset_defaults #(
|
||||
.PIX_WIDTH(10)
|
||||
) dut_10b_4 (
|
||||
.near(near_10b_4),
|
||||
.preset_maxval(maxval_10b_4),
|
||||
.preset_t1(t1_10b_4),
|
||||
.preset_t2(t2_10b_4),
|
||||
.preset_t3(t3_10b_4),
|
||||
.preset_reset(reset_10b_4)
|
||||
);
|
||||
|
||||
jls_preset_defaults #(
|
||||
.PIX_WIDTH(12)
|
||||
) dut_12b_1 (
|
||||
.near(near_12b_1),
|
||||
.preset_maxval(maxval_12b_1),
|
||||
.preset_t1(t1_12b_1),
|
||||
.preset_t2(t2_12b_1),
|
||||
.preset_t3(t3_12b_1),
|
||||
.preset_reset(reset_12b_1)
|
||||
);
|
||||
|
||||
jls_preset_defaults #(
|
||||
.PIX_WIDTH(14)
|
||||
) dut_14b_2 (
|
||||
.near(near_14b_2),
|
||||
.preset_maxval(maxval_14b_2),
|
||||
.preset_t1(t1_14b_2),
|
||||
.preset_t2(t2_14b_2),
|
||||
.preset_t3(t3_14b_2),
|
||||
.preset_reset(reset_14b_2)
|
||||
);
|
||||
|
||||
jls_preset_defaults #(
|
||||
.PIX_WIDTH(16)
|
||||
) dut_16b_63 (
|
||||
.near(near_16b_63),
|
||||
.preset_maxval(maxval_16b_63),
|
||||
.preset_t1(t1_16b_63),
|
||||
.preset_t2(t2_16b_63),
|
||||
.preset_t3(t3_16b_63),
|
||||
.preset_reset(reset_16b_63)
|
||||
);
|
||||
|
||||
initial begin
|
||||
near_8b_0 = 6'd0;
|
||||
near_8b_31 = 6'd31;
|
||||
near_10b_4 = 6'd4;
|
||||
near_12b_1 = 6'd1;
|
||||
near_14b_2 = 6'd2;
|
||||
near_16b_63 = 6'd63;
|
||||
|
||||
#1;
|
||||
|
||||
if (maxval_8b_0 !== 16'd255 || t1_8b_0 !== 16'd3 ||
|
||||
t2_8b_0 !== 16'd7 || t3_8b_0 !== 16'd21 ||
|
||||
reset_8b_0 !== 16'd64) begin
|
||||
$fatal(1, "8-bit NEAR=0 defaults mismatch");
|
||||
end
|
||||
|
||||
if (maxval_8b_31 !== 16'd255 || t1_8b_31 !== 16'd96 ||
|
||||
t2_8b_31 !== 16'd162 || t3_8b_31 !== 16'd238 ||
|
||||
reset_8b_31 !== 16'd64) begin
|
||||
$fatal(1, "8-bit NEAR=31 defaults mismatch");
|
||||
end
|
||||
|
||||
if (maxval_10b_4 !== 16'd1023 || t1_10b_4 !== 16'd18 ||
|
||||
t2_10b_4 !== 16'd39 || t3_10b_4 !== 16'd100 ||
|
||||
reset_10b_4 !== 16'd64) begin
|
||||
$fatal(1, "10-bit NEAR=4 defaults mismatch");
|
||||
end
|
||||
|
||||
if (maxval_12b_1 !== 16'd4095 || t1_12b_1 !== 16'd21 ||
|
||||
t2_12b_1 !== 16'd72 || t3_12b_1 !== 16'd283 ||
|
||||
reset_12b_1 !== 16'd64) begin
|
||||
$fatal(1, "12-bit NEAR=1 defaults mismatch");
|
||||
end
|
||||
|
||||
if (maxval_14b_2 !== 16'd16383 || t1_14b_2 !== 16'd24 ||
|
||||
t2_14b_2 !== 16'd77 || t3_14b_2 !== 16'd290 ||
|
||||
reset_14b_2 !== 16'd64) begin
|
||||
$fatal(1, "14-bit NEAR=2 defaults mismatch");
|
||||
end
|
||||
|
||||
if (maxval_16b_63 !== 16'hFFFF || t1_16b_63 !== 16'd111 ||
|
||||
t2_16b_63 !== 16'd222 || t3_16b_63 !== 16'd493 ||
|
||||
reset_16b_63 !== 16'd64) begin
|
||||
$fatal(1, "16-bit NEAR clamp defaults mismatch");
|
||||
end
|
||||
|
||||
$display("PASS: tb_jls_preset_defaults");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
296
fpga/sim/tb_jls_regular_error_quantizer.sv
Normal file
296
fpga/sim/tb_jls_regular_error_quantizer.sv
Normal file
@@ -0,0 +1,296 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex A.2 RANGE
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval quantization/modulo and reconstructed sample computation
|
||||
// Example : Checks lossless, NEAR=1, context sign, and modulo reconstruction.
|
||||
//
|
||||
// Smoke test for jls_regular_error_quantizer.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_regular_error_quantizer;
|
||||
|
||||
// Test precision.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
|
||||
// Expected events.
|
||||
localparam int EXPECTED_EVENT_COUNT = 6;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Corrected prediction input.
|
||||
logic corrected_valid;
|
||||
logic corrected_ready;
|
||||
logic [PIX_WIDTH-1:0] corrected_sample;
|
||||
logic [12:0] corrected_x;
|
||||
logic [12:0] corrected_y;
|
||||
logic corrected_strip_first_pixel;
|
||||
logic corrected_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] corrected_Px;
|
||||
logic [8:0] corrected_context_index;
|
||||
logic corrected_context_negative;
|
||||
logic corrected_run_mode_context;
|
||||
logic [31:0] corrected_A;
|
||||
logic signed [31:0] corrected_B;
|
||||
logic signed [8:0] corrected_C;
|
||||
logic [15:0] corrected_N;
|
||||
logic [16:0] RANGE;
|
||||
logic [4:0] qbpp;
|
||||
logic [6:0] LIMIT;
|
||||
logic [5:0] NEAR;
|
||||
|
||||
// Quantized error output.
|
||||
logic err_valid;
|
||||
logic err_ready;
|
||||
logic signed [31:0] Errval;
|
||||
logic [PIX_WIDTH-1:0] reconstructed_sample;
|
||||
logic [12:0] err_x;
|
||||
logic [12:0] err_y;
|
||||
logic err_strip_first_pixel;
|
||||
logic err_strip_last_pixel;
|
||||
logic [8:0] err_context_index;
|
||||
logic err_context_negative;
|
||||
logic err_run_mode_context;
|
||||
logic [4:0] err_qbpp;
|
||||
logic [6:0] err_LIMIT;
|
||||
logic [31:0] err_A;
|
||||
logic signed [31:0] err_B;
|
||||
logic signed [8:0] err_C;
|
||||
logic [15:0] err_N;
|
||||
|
||||
// Scoreboard state.
|
||||
logic signed [31:0] expected_err_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] expected_rx_mem [0:EXPECTED_EVENT_COUNT-1];
|
||||
int receive_index;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_regular_error_quantizer #(
|
||||
.PIX_WIDTH(PIX_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.corrected_valid(corrected_valid),
|
||||
.corrected_ready(corrected_ready),
|
||||
.corrected_sample(corrected_sample),
|
||||
.corrected_x(corrected_x),
|
||||
.corrected_y(corrected_y),
|
||||
.corrected_strip_first_pixel(corrected_strip_first_pixel),
|
||||
.corrected_strip_last_pixel(corrected_strip_last_pixel),
|
||||
.corrected_Px(corrected_Px),
|
||||
.corrected_context_index(corrected_context_index),
|
||||
.corrected_context_negative(corrected_context_negative),
|
||||
.corrected_run_mode_context(corrected_run_mode_context),
|
||||
.corrected_A(corrected_A),
|
||||
.corrected_B(corrected_B),
|
||||
.corrected_C(corrected_C),
|
||||
.corrected_N(corrected_N),
|
||||
.RANGE(RANGE),
|
||||
.qbpp(qbpp),
|
||||
.LIMIT(LIMIT),
|
||||
.NEAR(NEAR),
|
||||
.err_valid(err_valid),
|
||||
.err_ready(err_ready),
|
||||
.Errval(Errval),
|
||||
.reconstructed_sample(reconstructed_sample),
|
||||
.err_x(err_x),
|
||||
.err_y(err_y),
|
||||
.err_strip_first_pixel(err_strip_first_pixel),
|
||||
.err_strip_last_pixel(err_strip_last_pixel),
|
||||
.err_context_index(err_context_index),
|
||||
.err_context_negative(err_context_negative),
|
||||
.err_run_mode_context(err_run_mode_context),
|
||||
.err_qbpp(err_qbpp),
|
||||
.err_LIMIT(err_LIMIT),
|
||||
.err_A(err_A),
|
||||
.err_B(err_B),
|
||||
.err_C(err_C),
|
||||
.err_N(err_N)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
expected_err_mem[0] = 32'sd4;
|
||||
expected_rx_mem[0] = 8'd24;
|
||||
expected_err_mem[1] = 32'sd1;
|
||||
expected_rx_mem[1] = 8'd23;
|
||||
expected_err_mem[2] = -32'sd1;
|
||||
expected_rx_mem[2] = 8'd21;
|
||||
expected_err_mem[3] = -32'sd1;
|
||||
expected_rx_mem[3] = 8'd23;
|
||||
expected_err_mem[4] = 32'sd1;
|
||||
expected_rx_mem[4] = 8'd0;
|
||||
expected_err_mem[5] = -32'sd3;
|
||||
expected_rx_mem[5] = 8'd189;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
corrected_valid = 1'b0;
|
||||
corrected_sample = 8'd0;
|
||||
corrected_x = 13'd0;
|
||||
corrected_y = 13'd0;
|
||||
corrected_strip_first_pixel = 1'b0;
|
||||
corrected_strip_last_pixel = 1'b0;
|
||||
corrected_Px = 8'd0;
|
||||
corrected_context_index = 9'd0;
|
||||
corrected_context_negative = 1'b0;
|
||||
corrected_run_mode_context = 1'b0;
|
||||
corrected_A = 32'd4;
|
||||
corrected_B = 32'sd0;
|
||||
corrected_C = 9'sd0;
|
||||
corrected_N = 16'd1;
|
||||
RANGE = 17'd256;
|
||||
qbpp = 5'd8;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd0;
|
||||
err_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
corrected_valid = 1'b1;
|
||||
corrected_sample = 8'd24;
|
||||
corrected_Px = 8'd20;
|
||||
corrected_context_index = 9'd5;
|
||||
corrected_strip_first_pixel = 1'b1;
|
||||
RANGE = 17'd256;
|
||||
qbpp = 5'd8;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd0;
|
||||
wait (corrected_ready);
|
||||
@(posedge clk);
|
||||
corrected_valid = 1'b0;
|
||||
corrected_strip_first_pixel = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
corrected_valid = 1'b1;
|
||||
corrected_sample = 8'd24;
|
||||
corrected_Px = 8'd20;
|
||||
corrected_context_index = 9'd6;
|
||||
RANGE = 17'd86;
|
||||
qbpp = 5'd7;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd1;
|
||||
wait (corrected_ready);
|
||||
@(posedge clk);
|
||||
corrected_valid = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
corrected_valid = 1'b1;
|
||||
corrected_sample = 8'd20;
|
||||
corrected_Px = 8'd24;
|
||||
corrected_context_index = 9'd7;
|
||||
corrected_context_negative = 1'b0;
|
||||
RANGE = 17'd86;
|
||||
qbpp = 5'd7;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd1;
|
||||
wait (corrected_ready);
|
||||
@(posedge clk);
|
||||
corrected_valid = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
corrected_valid = 1'b1;
|
||||
corrected_sample = 8'd24;
|
||||
corrected_Px = 8'd20;
|
||||
corrected_context_index = 9'd8;
|
||||
corrected_context_negative = 1'b1;
|
||||
RANGE = 17'd86;
|
||||
qbpp = 5'd7;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd1;
|
||||
wait (corrected_ready);
|
||||
@(posedge clk);
|
||||
corrected_valid = 1'b0;
|
||||
corrected_context_negative = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
corrected_valid = 1'b1;
|
||||
corrected_sample = 8'd0;
|
||||
corrected_Px = 8'd255;
|
||||
corrected_context_index = 9'd9;
|
||||
corrected_strip_last_pixel = 1'b1;
|
||||
RANGE = 17'd256;
|
||||
qbpp = 5'd8;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd0;
|
||||
wait (corrected_ready);
|
||||
@(posedge clk);
|
||||
corrected_valid = 1'b0;
|
||||
corrected_strip_last_pixel = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
corrected_valid = 1'b1;
|
||||
corrected_sample = 8'd200;
|
||||
corrected_Px = 8'd0;
|
||||
corrected_context_index = 9'd10;
|
||||
RANGE = 17'd6;
|
||||
qbpp = 5'd3;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd31;
|
||||
wait (corrected_ready);
|
||||
@(posedge clk);
|
||||
corrected_valid = 1'b0;
|
||||
|
||||
wait (receive_index == EXPECTED_EVENT_COUNT);
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
end else if (err_valid && err_ready) begin
|
||||
if (receive_index >= EXPECTED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra regular-error event");
|
||||
end
|
||||
|
||||
if (Errval !== expected_err_mem[receive_index]) begin
|
||||
$fatal(1, "Errval mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, Errval, expected_err_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (reconstructed_sample !== expected_rx_mem[receive_index]) begin
|
||||
$fatal(1, "reconstructed sample mismatch at %0d: got %0d expected %0d",
|
||||
receive_index, reconstructed_sample, expected_rx_mem[receive_index]);
|
||||
end
|
||||
|
||||
if (err_context_index !== (9'd5 + receive_index[8:0])) begin
|
||||
$fatal(1, "err_context_index mismatch at %0d", receive_index);
|
||||
end
|
||||
|
||||
if (receive_index == 0 && err_strip_first_pixel !== 1'b1) begin
|
||||
$fatal(1, "first event should preserve strip_first_pixel");
|
||||
end
|
||||
|
||||
if (receive_index == 4 && err_strip_last_pixel !== 1'b1) begin
|
||||
$fatal(1, "last event should preserve strip_last_pixel");
|
||||
end
|
||||
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (5000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for regular error quantizer smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
$display("PASS: tb_jls_regular_error_quantizer");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
419
fpga/sim/tb_jls_run_mode.sv
Normal file
419
fpga/sim/tb_jls_run_mode.sv
Normal file
@@ -0,0 +1,419 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.7 run mode, Annex A.5 Golomb coding
|
||||
// Figure : N/A
|
||||
// Table : Annex A RUNindex/J table
|
||||
// Pseudocode : run-length bits, RItype, map, MErrval, and RI context update
|
||||
// Example : A zero-length run at RUNindex=0 emits one zero bit before
|
||||
// run-interruption MErrval.
|
||||
//
|
||||
// Smoke test for jls_run_mode. It checks:
|
||||
// 1. zero-length RItype=1 interruption,
|
||||
// 2. zero-length RItype=0 interruption with negative Errval mapping,
|
||||
// 3. end-of-line run chunks that advance RUNindex without interruption,
|
||||
// 4. NEAR=31 run-interruption reciprocal-division path.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_run_mode;
|
||||
|
||||
// Test precision and direct code-event width.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
localparam int MAX_CODE_BITS = 64;
|
||||
|
||||
// Expected output counts.
|
||||
localparam int EXPECTED_CODE_EVENT_COUNT = 6;
|
||||
localparam int EXPECTED_MAPPED_EVENT_COUNT = 3;
|
||||
localparam int EXPECTED_RECON_EVENT_COUNT = 3;
|
||||
localparam int EXPECTED_DONE_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Strip coding parameters.
|
||||
logic strip_init_valid;
|
||||
logic strip_init_ready;
|
||||
logic [16:0] RANGE;
|
||||
logic [4:0] qbpp;
|
||||
logic [6:0] LIMIT;
|
||||
logic [5:0] NEAR;
|
||||
logic [15:0] RESET;
|
||||
|
||||
// Run-segment input.
|
||||
logic segment_valid;
|
||||
logic segment_ready;
|
||||
logic [12:0] run_length;
|
||||
logic run_end_of_line;
|
||||
logic interruption_valid;
|
||||
logic [PIX_WIDTH-1:0] interruption_sample;
|
||||
logic [12:0] interruption_x;
|
||||
logic [12:0] interruption_y;
|
||||
logic interruption_strip_first_pixel;
|
||||
logic interruption_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] Ra;
|
||||
logic [PIX_WIDTH-1:0] Rb;
|
||||
|
||||
// Direct run-code output.
|
||||
logic run_code_valid;
|
||||
logic run_code_ready;
|
||||
logic [MAX_CODE_BITS-1:0] run_code_bits;
|
||||
logic [6:0] run_code_bit_count;
|
||||
|
||||
// Mapped interruption output.
|
||||
logic mapped_valid;
|
||||
logic mapped_ready;
|
||||
logic [31:0] MErrval;
|
||||
logic [4:0] k;
|
||||
logic [6:0] limit;
|
||||
logic [4:0] mapped_qbpp;
|
||||
logic mapped_strip_last_pixel;
|
||||
|
||||
// Reconstructed interruption output.
|
||||
logic recon_valid;
|
||||
logic recon_ready;
|
||||
logic [PIX_WIDTH-1:0] reconstructed_sample;
|
||||
logic [12:0] recon_x;
|
||||
logic [12:0] recon_y;
|
||||
logic recon_strip_first_pixel;
|
||||
logic recon_strip_last_pixel;
|
||||
|
||||
// Segment completion.
|
||||
logic segment_done;
|
||||
logic segment_last_done;
|
||||
|
||||
// Scoreboard state.
|
||||
logic [MAX_CODE_BITS-1:0] expected_code_bits_mem [0:EXPECTED_CODE_EVENT_COUNT-1];
|
||||
logic [6:0] expected_code_count_mem [0:EXPECTED_CODE_EVENT_COUNT-1];
|
||||
logic [31:0] expected_MErrval_mem [0:EXPECTED_MAPPED_EVENT_COUNT-1];
|
||||
logic [4:0] expected_k_mem [0:EXPECTED_MAPPED_EVENT_COUNT-1];
|
||||
logic [6:0] expected_limit_mem [0:EXPECTED_MAPPED_EVENT_COUNT-1];
|
||||
logic [4:0] expected_qbpp_mem [0:EXPECTED_MAPPED_EVENT_COUNT-1];
|
||||
logic [PIX_WIDTH-1:0] expected_rx_mem [0:EXPECTED_RECON_EVENT_COUNT-1];
|
||||
int code_index;
|
||||
int mapped_index;
|
||||
int recon_index;
|
||||
int done_count;
|
||||
logic all_done_seen;
|
||||
|
||||
jls_run_mode #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.MAX_CODE_BITS(MAX_CODE_BITS)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.strip_init_valid(strip_init_valid),
|
||||
.strip_init_ready(strip_init_ready),
|
||||
.RANGE(RANGE),
|
||||
.qbpp(qbpp),
|
||||
.LIMIT(LIMIT),
|
||||
.NEAR(NEAR),
|
||||
.RESET(RESET),
|
||||
.segment_valid(segment_valid),
|
||||
.segment_ready(segment_ready),
|
||||
.run_length(run_length),
|
||||
.run_end_of_line(run_end_of_line),
|
||||
.interruption_valid(interruption_valid),
|
||||
.interruption_sample(interruption_sample),
|
||||
.interruption_x(interruption_x),
|
||||
.interruption_y(interruption_y),
|
||||
.interruption_strip_first_pixel(interruption_strip_first_pixel),
|
||||
.interruption_strip_last_pixel(interruption_strip_last_pixel),
|
||||
.Ra(Ra),
|
||||
.Rb(Rb),
|
||||
.run_code_valid(run_code_valid),
|
||||
.run_code_ready(run_code_ready),
|
||||
.run_code_bits(run_code_bits),
|
||||
.run_code_bit_count(run_code_bit_count),
|
||||
.mapped_valid(mapped_valid),
|
||||
.mapped_ready(mapped_ready),
|
||||
.MErrval(MErrval),
|
||||
.k(k),
|
||||
.limit(limit),
|
||||
.mapped_qbpp(mapped_qbpp),
|
||||
.mapped_strip_last_pixel(mapped_strip_last_pixel),
|
||||
.recon_valid(recon_valid),
|
||||
.recon_ready(recon_ready),
|
||||
.reconstructed_sample(reconstructed_sample),
|
||||
.recon_x(recon_x),
|
||||
.recon_y(recon_y),
|
||||
.recon_strip_first_pixel(recon_strip_first_pixel),
|
||||
.recon_strip_last_pixel(recon_strip_last_pixel),
|
||||
.segment_done(segment_done),
|
||||
.segment_last_done(segment_last_done)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
// Segment 0: run_length=0 at RUNindex=0 emits one zero bit.
|
||||
expected_code_bits_mem[0] = 64'h0000_0000_0000_0000;
|
||||
expected_code_count_mem[0] = 7'd1;
|
||||
|
||||
// Segment 1: another zero-length run, still RUNindex=0.
|
||||
expected_code_bits_mem[1] = 64'h0000_0000_0000_0000;
|
||||
expected_code_count_mem[1] = 7'd1;
|
||||
|
||||
// Segment 2: EOL run_length=3 starts at RUNindex=0 and emits three full
|
||||
// run chunks because J[0],J[1],J[2] are all zero.
|
||||
expected_code_bits_mem[2] = 64'h8000_0000_0000_0000;
|
||||
expected_code_count_mem[2] = 7'd1;
|
||||
expected_code_bits_mem[3] = 64'h8000_0000_0000_0000;
|
||||
expected_code_count_mem[3] = 7'd1;
|
||||
expected_code_bits_mem[4] = 64'h8000_0000_0000_0000;
|
||||
expected_code_count_mem[4] = 7'd1;
|
||||
|
||||
// Segment 3: NEAR=31 zero-length RItype=1 interruption.
|
||||
expected_code_bits_mem[5] = 64'h0000_0000_0000_0000;
|
||||
expected_code_count_mem[5] = 7'd1;
|
||||
|
||||
// RItype=1: A=4,N=1 => k=2, Errval=3, map=0, MErrval=5.
|
||||
expected_MErrval_mem[0] = 32'd5;
|
||||
expected_k_mem[0] = 5'd2;
|
||||
expected_limit_mem[0] = 7'd31;
|
||||
expected_qbpp_mem[0] = 5'd8;
|
||||
expected_rx_mem[0] = 8'd13;
|
||||
|
||||
// RItype=0: A=4,N=1 => k=2, Errval=-2, map=1, MErrval=3.
|
||||
expected_MErrval_mem[1] = 32'd3;
|
||||
expected_k_mem[1] = 5'd2;
|
||||
expected_limit_mem[1] = 7'd31;
|
||||
expected_qbpp_mem[1] = 5'd8;
|
||||
expected_rx_mem[1] = 8'd7;
|
||||
|
||||
// NEAR=31, RANGE=6: Errval=3 is modulo-mapped to -3, RItype=1 and
|
||||
// run_map=1 produce MErrval=4; reconstruction wraps to 189.
|
||||
expected_MErrval_mem[2] = 32'd4;
|
||||
expected_k_mem[2] = 5'd1;
|
||||
expected_limit_mem[2] = 7'd31;
|
||||
expected_qbpp_mem[2] = 5'd3;
|
||||
expected_rx_mem[2] = 8'd189;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
strip_init_valid = 1'b0;
|
||||
RANGE = 17'd256;
|
||||
qbpp = 5'd8;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd0;
|
||||
RESET = 16'd64;
|
||||
segment_valid = 1'b0;
|
||||
run_length = 13'd0;
|
||||
run_end_of_line = 1'b0;
|
||||
interruption_valid = 1'b0;
|
||||
interruption_sample = 8'd0;
|
||||
interruption_x = 13'd0;
|
||||
interruption_y = 13'd0;
|
||||
interruption_strip_first_pixel = 1'b0;
|
||||
interruption_strip_last_pixel = 1'b0;
|
||||
Ra = 8'd0;
|
||||
Rb = 8'd0;
|
||||
run_code_ready = 1'b1;
|
||||
mapped_ready = 1'b1;
|
||||
recon_ready = 1'b1;
|
||||
all_done_seen = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
@(negedge clk);
|
||||
strip_init_valid = 1'b1;
|
||||
wait (strip_init_ready);
|
||||
@(posedge clk);
|
||||
strip_init_valid = 1'b0;
|
||||
|
||||
// Zero-length RItype=1 interruption: |Ra-Rb| <= NEAR.
|
||||
@(negedge clk);
|
||||
segment_valid = 1'b1;
|
||||
run_length = 13'd0;
|
||||
run_end_of_line = 1'b0;
|
||||
interruption_valid = 1'b1;
|
||||
interruption_sample = 8'd13;
|
||||
interruption_x = 13'd4;
|
||||
interruption_y = 13'd1;
|
||||
interruption_strip_first_pixel = 1'b1;
|
||||
interruption_strip_last_pixel = 1'b0;
|
||||
Ra = 8'd10;
|
||||
Rb = 8'd10;
|
||||
wait (segment_ready);
|
||||
@(posedge clk);
|
||||
segment_valid = 1'b0;
|
||||
interruption_strip_first_pixel = 1'b0;
|
||||
wait (done_count == 1);
|
||||
|
||||
// Zero-length RItype=0 interruption: sign is positive because Rb > Ra.
|
||||
@(negedge clk);
|
||||
segment_valid = 1'b1;
|
||||
run_length = 13'd0;
|
||||
run_end_of_line = 1'b0;
|
||||
interruption_valid = 1'b1;
|
||||
interruption_sample = 8'd7;
|
||||
interruption_x = 13'd8;
|
||||
interruption_y = 13'd1;
|
||||
interruption_strip_last_pixel = 1'b0;
|
||||
Ra = 8'd5;
|
||||
Rb = 8'd9;
|
||||
wait (segment_ready);
|
||||
@(posedge clk);
|
||||
segment_valid = 1'b0;
|
||||
wait (done_count == 2);
|
||||
|
||||
// EOL run_length=3 with no interruption.
|
||||
@(negedge clk);
|
||||
segment_valid = 1'b1;
|
||||
run_length = 13'd3;
|
||||
run_end_of_line = 1'b1;
|
||||
interruption_valid = 1'b0;
|
||||
interruption_sample = 8'd0;
|
||||
interruption_x = 13'd15;
|
||||
interruption_y = 13'd1;
|
||||
interruption_strip_last_pixel = 1'b1;
|
||||
Ra = 8'd22;
|
||||
Rb = 8'd22;
|
||||
wait (segment_ready);
|
||||
@(posedge clk);
|
||||
segment_valid = 1'b0;
|
||||
interruption_strip_last_pixel = 1'b0;
|
||||
wait (done_count == 3);
|
||||
|
||||
@(negedge clk);
|
||||
RANGE = 17'd6;
|
||||
qbpp = 5'd3;
|
||||
LIMIT = 7'd32;
|
||||
NEAR = 6'd31;
|
||||
strip_init_valid = 1'b1;
|
||||
wait (strip_init_ready);
|
||||
@(posedge clk);
|
||||
strip_init_valid = 1'b0;
|
||||
|
||||
// NEAR=31 zero-length RItype=1 interruption exercises the reciprocal LUT.
|
||||
@(negedge clk);
|
||||
segment_valid = 1'b1;
|
||||
run_length = 13'd0;
|
||||
run_end_of_line = 1'b0;
|
||||
interruption_valid = 1'b1;
|
||||
interruption_sample = 8'd200;
|
||||
interruption_x = 13'd2;
|
||||
interruption_y = 13'd2;
|
||||
interruption_strip_last_pixel = 1'b1;
|
||||
Ra = 8'd0;
|
||||
Rb = 8'd0;
|
||||
wait (segment_ready);
|
||||
@(posedge clk);
|
||||
segment_valid = 1'b0;
|
||||
interruption_strip_last_pixel = 1'b0;
|
||||
wait (done_count == EXPECTED_DONE_COUNT);
|
||||
|
||||
all_done_seen = 1'b1;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
code_index <= 0;
|
||||
mapped_index <= 0;
|
||||
recon_index <= 0;
|
||||
done_count <= 0;
|
||||
end else begin
|
||||
if (run_code_valid && run_code_ready) begin
|
||||
if (code_index >= EXPECTED_CODE_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra run code event");
|
||||
end
|
||||
|
||||
if (run_code_bit_count !== expected_code_count_mem[code_index]) begin
|
||||
$fatal(1, "run code count mismatch at %0d: got %0d expected %0d",
|
||||
code_index, run_code_bit_count, expected_code_count_mem[code_index]);
|
||||
end
|
||||
|
||||
if (run_code_bits !== expected_code_bits_mem[code_index]) begin
|
||||
$fatal(1, "run code bits mismatch at %0d: got 0x%016h expected 0x%016h",
|
||||
code_index, run_code_bits, expected_code_bits_mem[code_index]);
|
||||
end
|
||||
|
||||
code_index <= code_index + 1;
|
||||
end
|
||||
|
||||
if (mapped_valid && mapped_ready) begin
|
||||
if (mapped_index >= EXPECTED_MAPPED_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra mapped run-interruption event");
|
||||
end
|
||||
|
||||
if (MErrval !== expected_MErrval_mem[mapped_index]) begin
|
||||
$fatal(1, "MErrval mismatch at %0d: got %0d expected %0d",
|
||||
mapped_index, MErrval, expected_MErrval_mem[mapped_index]);
|
||||
end
|
||||
|
||||
if (k !== expected_k_mem[mapped_index]) begin
|
||||
$fatal(1, "k mismatch at %0d: got %0d expected %0d",
|
||||
mapped_index, k, expected_k_mem[mapped_index]);
|
||||
end
|
||||
|
||||
if (limit !== expected_limit_mem[mapped_index]) begin
|
||||
$fatal(1, "limit mismatch at %0d: got %0d expected %0d",
|
||||
mapped_index, limit, expected_limit_mem[mapped_index]);
|
||||
end
|
||||
|
||||
if (mapped_qbpp !== expected_qbpp_mem[mapped_index]) begin
|
||||
$fatal(1, "mapped_qbpp mismatch at %0d", mapped_index);
|
||||
end
|
||||
|
||||
mapped_index <= mapped_index + 1;
|
||||
end
|
||||
|
||||
if (recon_valid && recon_ready) begin
|
||||
if (recon_index >= EXPECTED_RECON_EVENT_COUNT) begin
|
||||
$fatal(1, "Unexpected extra reconstructed run-interruption event");
|
||||
end
|
||||
|
||||
if (reconstructed_sample !== expected_rx_mem[recon_index]) begin
|
||||
$fatal(1, "reconstructed sample mismatch at %0d: got %0d expected %0d",
|
||||
recon_index, reconstructed_sample, expected_rx_mem[recon_index]);
|
||||
end
|
||||
|
||||
if (recon_index == 0 && recon_strip_first_pixel !== 1'b1) begin
|
||||
$fatal(1, "first run-interruption should preserve strip_first_pixel");
|
||||
end
|
||||
|
||||
recon_index <= recon_index + 1;
|
||||
end
|
||||
|
||||
if (segment_done) begin
|
||||
done_count <= done_count + 1;
|
||||
|
||||
if (done_count == 2 && segment_last_done !== 1'b1) begin
|
||||
$fatal(1, "third segment should report segment_last_done");
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (5000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for run-mode smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (all_done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
|
||||
if (code_index !== EXPECTED_CODE_EVENT_COUNT) begin
|
||||
$fatal(1, "run code count mismatch: got %0d", code_index);
|
||||
end
|
||||
|
||||
if (mapped_index !== EXPECTED_MAPPED_EVENT_COUNT) begin
|
||||
$fatal(1, "mapped count mismatch: got %0d", mapped_index);
|
||||
end
|
||||
|
||||
if (recon_index !== EXPECTED_RECON_EVENT_COUNT) begin
|
||||
$fatal(1, "recon count mismatch: got %0d", recon_index);
|
||||
end
|
||||
|
||||
$display("PASS: tb_jls_run_mode");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
219
fpga/sim/tb_jls_scan_ctrl.sv
Normal file
219
fpga/sim/tb_jls_scan_ctrl.sv
Normal file
@@ -0,0 +1,219 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 control procedure, Annex D.1-D.3 scan control
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Start one JPEG-LS scan per standalone strip frame
|
||||
// Example : Four-pixel smoke strip emits one start and one finish event.
|
||||
//
|
||||
// Smoke test for jls_scan_ctrl.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jls_scan_ctrl;
|
||||
|
||||
// Test precision and strip height.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
localparam int SCAN_ROWS = 4;
|
||||
localparam int TEST_PIXEL_COUNT = 4;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Input pixel event.
|
||||
logic pixel_valid;
|
||||
logic pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] pixel_sample;
|
||||
logic [12:0] pixel_x;
|
||||
logic [12:0] pixel_y;
|
||||
logic strip_first_pixel;
|
||||
logic strip_last_pixel;
|
||||
logic image_first_pixel;
|
||||
logic image_last_pixel;
|
||||
logic [12:0] active_pic_col;
|
||||
logic [3:0] active_ratio;
|
||||
logic [5:0] current_near;
|
||||
|
||||
// Forwarded encode pixel event.
|
||||
logic enc_pixel_valid;
|
||||
logic enc_pixel_ready;
|
||||
logic [PIX_WIDTH-1:0] enc_pixel_sample;
|
||||
logic [12:0] enc_pixel_x;
|
||||
logic [12:0] enc_pixel_y;
|
||||
logic enc_row_last_pixel;
|
||||
logic enc_strip_first_pixel;
|
||||
logic enc_strip_last_pixel;
|
||||
|
||||
// Strip control events.
|
||||
logic strip_start_valid;
|
||||
logic strip_start_ready;
|
||||
logic original_image_first_strip;
|
||||
logic [12:0] strip_width;
|
||||
logic [12:0] strip_height;
|
||||
logic [5:0] strip_near;
|
||||
logic strip_finish_valid;
|
||||
logic strip_finish_ready;
|
||||
logic original_image_last_strip;
|
||||
logic [31:0] strip_pixel_count;
|
||||
logic near_image_start_valid;
|
||||
logic [3:0] near_image_ratio;
|
||||
|
||||
// Scoreboard state.
|
||||
int send_index;
|
||||
int receive_index;
|
||||
int start_count;
|
||||
int finish_count;
|
||||
logic done_seen;
|
||||
|
||||
jls_scan_ctrl #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.SCAN_ROWS(SCAN_ROWS)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.pixel_valid(pixel_valid),
|
||||
.pixel_ready(pixel_ready),
|
||||
.pixel_sample(pixel_sample),
|
||||
.pixel_x(pixel_x),
|
||||
.pixel_y(pixel_y),
|
||||
.strip_first_pixel(strip_first_pixel),
|
||||
.strip_last_pixel(strip_last_pixel),
|
||||
.image_first_pixel(image_first_pixel),
|
||||
.image_last_pixel(image_last_pixel),
|
||||
.active_pic_col(active_pic_col),
|
||||
.active_ratio(active_ratio),
|
||||
.current_near(current_near),
|
||||
.enc_pixel_valid(enc_pixel_valid),
|
||||
.enc_pixel_ready(enc_pixel_ready),
|
||||
.enc_pixel_sample(enc_pixel_sample),
|
||||
.enc_pixel_x(enc_pixel_x),
|
||||
.enc_pixel_y(enc_pixel_y),
|
||||
.enc_row_last_pixel(enc_row_last_pixel),
|
||||
.enc_strip_first_pixel(enc_strip_first_pixel),
|
||||
.enc_strip_last_pixel(enc_strip_last_pixel),
|
||||
.strip_start_valid(strip_start_valid),
|
||||
.strip_start_ready(strip_start_ready),
|
||||
.original_image_first_strip(original_image_first_strip),
|
||||
.strip_width(strip_width),
|
||||
.strip_height(strip_height),
|
||||
.strip_near(strip_near),
|
||||
.strip_finish_valid(strip_finish_valid),
|
||||
.strip_finish_ready(strip_finish_ready),
|
||||
.original_image_last_strip(original_image_last_strip),
|
||||
.strip_pixel_count(strip_pixel_count),
|
||||
.near_image_start_valid(near_image_start_valid),
|
||||
.near_image_ratio(near_image_ratio)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 8'h00;
|
||||
pixel_x = 13'd0;
|
||||
pixel_y = 13'd0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
image_first_pixel = 1'b0;
|
||||
image_last_pixel = 1'b0;
|
||||
active_pic_col = 13'd4;
|
||||
active_ratio = 4'd2;
|
||||
current_near = 6'd7;
|
||||
enc_pixel_ready = 1'b1;
|
||||
strip_start_ready = 1'b1;
|
||||
strip_finish_ready = 1'b1;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
for (send_index = 0; send_index < TEST_PIXEL_COUNT; send_index = send_index + 1) begin
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b1;
|
||||
pixel_sample = send_index[7:0];
|
||||
pixel_x = send_index[12:0];
|
||||
pixel_y = 13'd0;
|
||||
strip_first_pixel = (send_index == 0);
|
||||
strip_last_pixel = (send_index == (TEST_PIXEL_COUNT - 1));
|
||||
image_first_pixel = (send_index == 0);
|
||||
image_last_pixel = (send_index == (TEST_PIXEL_COUNT - 1));
|
||||
end
|
||||
|
||||
@(posedge clk);
|
||||
pixel_valid = 1'b0;
|
||||
pixel_sample = 8'h00;
|
||||
pixel_x = 13'd0;
|
||||
strip_first_pixel = 1'b0;
|
||||
strip_last_pixel = 1'b0;
|
||||
image_first_pixel = 1'b0;
|
||||
image_last_pixel = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
receive_index <= 0;
|
||||
start_count <= 0;
|
||||
finish_count <= 0;
|
||||
done_seen <= 1'b0;
|
||||
end else begin
|
||||
if (pixel_valid && !pixel_ready) begin
|
||||
$fatal(1, "pixel_ready unexpectedly low in scan smoke");
|
||||
end
|
||||
|
||||
if (enc_pixel_valid && enc_pixel_ready) begin
|
||||
if (enc_pixel_sample !== receive_index[7:0]) begin
|
||||
$fatal(1, "enc_pixel_sample mismatch at %0d", receive_index);
|
||||
end
|
||||
if (enc_row_last_pixel !== (receive_index == (TEST_PIXEL_COUNT - 1))) begin
|
||||
$fatal(1, "enc_row_last_pixel mismatch at %0d", receive_index);
|
||||
end
|
||||
receive_index <= receive_index + 1;
|
||||
end
|
||||
|
||||
if (strip_start_valid && strip_start_ready) begin
|
||||
start_count <= start_count + 1;
|
||||
if (original_image_first_strip !== 1'b1) begin
|
||||
$fatal(1, "original_image_first_strip should be high in this smoke");
|
||||
end
|
||||
if (strip_width !== 13'd4 || strip_height !== 13'd4 || strip_near !== 6'd0) begin
|
||||
$fatal(1, "strip start fields mismatch");
|
||||
end
|
||||
end
|
||||
|
||||
if (near_image_start_valid) begin
|
||||
if (near_image_ratio !== 4'd2) begin
|
||||
$fatal(1, "near_image_ratio mismatch");
|
||||
end
|
||||
end
|
||||
|
||||
if (strip_finish_valid && strip_finish_ready) begin
|
||||
finish_count <= finish_count + 1;
|
||||
if (original_image_last_strip !== 1'b1 || strip_pixel_count !== 32'd4) begin
|
||||
$fatal(1, "strip finish fields mismatch");
|
||||
end
|
||||
done_seen <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (1000) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for scan controller smoke");
|
||||
end
|
||||
|
||||
initial begin
|
||||
wait (done_seen);
|
||||
repeat (4) @(posedge clk);
|
||||
if (receive_index !== TEST_PIXEL_COUNT || start_count !== 1 || finish_count !== 1) begin
|
||||
$fatal(1, "scan smoke count mismatch");
|
||||
end
|
||||
$display("PASS: tb_jls_scan_ctrl");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
96
fpga/sim/tb_jpeg_ls_encoder_top_idle.sv
Normal file
96
fpga/sim/tb_jpeg_ls_encoder_top_idle.sv
Normal file
@@ -0,0 +1,96 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 control procedure, Annex C.1-C.4 marker stream syntax
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Top-level strip-frame encoder idle integration
|
||||
// Example : Empty input FIFO keeps the encoder idle and output quiet.
|
||||
//
|
||||
// Idle smoke test for jpeg_ls_encoder_top. This checks elaboration of the
|
||||
// integrated RTL without feeding pixels into the still-pending run-mode path.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jpeg_ls_encoder_top_idle;
|
||||
|
||||
// Use 8-bit configuration for the fastest top-level smoke.
|
||||
localparam int PIX_WIDTH = 8;
|
||||
localparam int IFIFO_DATA_WIDTH = ((PIX_WIDTH + 7) / 8) * 9;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Top-level configuration and FIFO ports.
|
||||
logic [12:0] cfg_pic_col;
|
||||
logic [12:0] cfg_pic_row;
|
||||
logic [3:0] ratio;
|
||||
logic ififo_rclk;
|
||||
logic ififo_rd;
|
||||
logic [IFIFO_DATA_WIDTH-1:0] ififo_rdata;
|
||||
logic ififo_empty;
|
||||
logic ififo_alempty;
|
||||
logic ofifo_wclk;
|
||||
logic ofifo_wr;
|
||||
logic [8:0] ofifo_wdata;
|
||||
logic ofifo_full;
|
||||
logic ofifo_alfull;
|
||||
|
||||
jpeg_ls_encoder_top #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.DEFAULT_PIC_COL(16),
|
||||
.DEFAULT_PIC_ROW(16),
|
||||
.MAX_PIC_COL(64),
|
||||
.MAX_PIC_ROW(64),
|
||||
.SCAN_ROWS(16),
|
||||
.OUT_BUF_BYTES(256),
|
||||
.OUT_BUF_AFULL_MARGIN(16),
|
||||
.IFIFO_DATA_WIDTH(IFIFO_DATA_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.cfg_pic_col(cfg_pic_col),
|
||||
.cfg_pic_row(cfg_pic_row),
|
||||
.ratio(ratio),
|
||||
.ififo_rclk(ififo_rclk),
|
||||
.ififo_rd(ififo_rd),
|
||||
.ififo_rdata(ififo_rdata),
|
||||
.ififo_empty(ififo_empty),
|
||||
.ififo_alempty(ififo_alempty),
|
||||
.ofifo_wclk(ofifo_wclk),
|
||||
.ofifo_wr(ofifo_wr),
|
||||
.ofifo_wdata(ofifo_wdata),
|
||||
.ofifo_full(ofifo_full),
|
||||
.ofifo_alfull(ofifo_alfull)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
cfg_pic_col = 13'd16;
|
||||
cfg_pic_row = 13'd16;
|
||||
ratio = 4'd0;
|
||||
ififo_rdata = {IFIFO_DATA_WIDTH{1'b0}};
|
||||
ififo_empty = 1'b1;
|
||||
ififo_alempty = 1'b1;
|
||||
ofifo_full = 1'b0;
|
||||
ofifo_alfull = 1'b0;
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
repeat (100) @(posedge clk);
|
||||
if (ififo_rd !== 1'b0 || ofifo_wr !== 1'b0) begin
|
||||
$fatal(1, "top idle smoke expected no FIFO activity");
|
||||
end
|
||||
|
||||
$display("PASS: tb_jpeg_ls_encoder_top_idle");
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
473
fpga/sim/tb_jpeg_ls_encoder_top_run_smoke.sv
Normal file
473
fpga/sim/tb_jpeg_ls_encoder_top_run_smoke.sv
Normal file
@@ -0,0 +1,473 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.7 run mode, Annex C.1-C.4 marker stream syntax
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Small all-zero image exercises run-mode strip closure
|
||||
// Example : 16x16 zero image should produce SOI ... EOI and one original
|
||||
// image-start sideband byte.
|
||||
//
|
||||
// Non-empty top-level smoke. This is a tiny compatibility-oriented smoke for
|
||||
// the integrated path. Use plusargs:
|
||||
// +PATTERN=0 for all-zero run-mode heavy image,
|
||||
// +PATTERN=1 for row-major ramp regular-mode heavy image,
|
||||
// +PATTERN=2 for checkerboard,
|
||||
// +PATTERN=3 for a vertical edge,
|
||||
// +PATTERN=4 for deterministic pseudo-noise/texture,
|
||||
// +PATTERN=9 to rotate ten representative patterns per image,
|
||||
// +IN_PGM=<path> to load a binary P5 PGM image instead of a synthetic pattern,
|
||||
// +RATIO=<0..15> to drive the top-level dynamic NEAR ratio port,
|
||||
// +OUT=<path> to choose the emitted .jls file,
|
||||
// +CASE=<name> to tag an optional CSV stats line,
|
||||
// +STATS=<path> to append a CSV stats line,
|
||||
// +CHECK_THROUGHPUT=1 to require at least 200 MPixel/s at 250 MHz.
|
||||
// The IMAGE_COUNT parameter repeats complete original images back-to-back for
|
||||
// a small continuous-input smoke. Each repeated image has its own SOF bit.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module tb_jpeg_ls_encoder_top_run_smoke;
|
||||
|
||||
// Default to 8-bit for the fastest top-level smoke; scripts can override
|
||||
// PIX_WIDTH to cover the other required grayscale precisions.
|
||||
parameter int PIX_WIDTH = 8;
|
||||
parameter int PIC_COL = 16;
|
||||
parameter int PIC_ROW = 16;
|
||||
parameter int SCAN_ROWS = 16;
|
||||
parameter int IMAGE_COUNT = 1;
|
||||
parameter int TIMEOUT_PER_PIXEL = 512;
|
||||
localparam int IFIFO_DATA_WIDTH = ((PIX_WIDTH + 7) / 8) * 9;
|
||||
localparam int SOF_BIT_INDEX = (PIX_WIDTH == 8) ? 8 : 17;
|
||||
localparam int PIXELS_PER_IMAGE = PIC_COL * PIC_ROW;
|
||||
localparam int FIFO_WORD_COUNT = PIXELS_PER_IMAGE * IMAGE_COUNT;
|
||||
localparam int PGM_BYTES_PER_SAMPLE = (PIX_WIDTH <= 8) ? 1 : 2;
|
||||
localparam int PGM_BYTES_PER_IMAGE = PIXELS_PER_IMAGE * PGM_BYTES_PER_SAMPLE;
|
||||
localparam int EXPECTED_FRAME_COUNT = (PIC_ROW / SCAN_ROWS) * IMAGE_COUNT;
|
||||
localparam longint SIM_TIMEOUT_CYCLES =
|
||||
(longint'(FIFO_WORD_COUNT) * TIMEOUT_PER_PIXEL) + 200000;
|
||||
localparam int SAMPLE_MAX_VALUE = (1 << PIX_WIDTH) - 1;
|
||||
|
||||
// Main simulation clock and reset.
|
||||
logic clk;
|
||||
logic rst;
|
||||
|
||||
// Top-level configuration and FIFO ports.
|
||||
logic [12:0] cfg_pic_col;
|
||||
logic [12:0] cfg_pic_row;
|
||||
logic [3:0] ratio;
|
||||
logic ififo_rclk;
|
||||
logic ififo_rd;
|
||||
logic [IFIFO_DATA_WIDTH-1:0] ififo_rdata;
|
||||
logic ififo_empty;
|
||||
logic ififo_alempty;
|
||||
logic ofifo_wclk;
|
||||
logic ofifo_wr;
|
||||
logic [8:0] ofifo_wdata;
|
||||
logic ofifo_full;
|
||||
logic ofifo_alfull;
|
||||
|
||||
// FIFO model and output scoreboard.
|
||||
logic [IFIFO_DATA_WIDTH-1:0] fifo_mem [0:FIFO_WORD_COUNT-1];
|
||||
byte unsigned pgm_payload [0:PGM_BYTES_PER_IMAGE-1];
|
||||
int fifo_rd_index;
|
||||
int init_index;
|
||||
int output_byte_count;
|
||||
int image_start_sideband_count;
|
||||
int jls_fd;
|
||||
int pattern_id;
|
||||
int ratio_id;
|
||||
int check_throughput;
|
||||
int trace_entropy;
|
||||
int image_pixel_index;
|
||||
int image_index;
|
||||
int pixel_x;
|
||||
int pixel_y;
|
||||
int pattern_select;
|
||||
int sample_value;
|
||||
int pgm_fd;
|
||||
int pgm_header_width;
|
||||
int pgm_header_height;
|
||||
int pgm_header_max_value;
|
||||
int pgm_bytes_read;
|
||||
int pgm_byte_index;
|
||||
int cycle_count;
|
||||
int input_read_count;
|
||||
int input_first_read_cycle;
|
||||
int input_last_read_cycle;
|
||||
longint input_active_cycles;
|
||||
longint throughput_mpix_x1000;
|
||||
longint throughput_left_side;
|
||||
longint throughput_right_side;
|
||||
int stats_fd;
|
||||
string jls_output_path;
|
||||
string stats_output_path;
|
||||
string case_id;
|
||||
logic first_output_seen;
|
||||
logic last_output_was_ff;
|
||||
logic input_read_started;
|
||||
int eoi_count;
|
||||
string input_pgm_path;
|
||||
string pgm_header_line;
|
||||
|
||||
jpeg_ls_encoder_top #(
|
||||
.PIX_WIDTH(PIX_WIDTH),
|
||||
.DEFAULT_PIC_COL(PIC_COL),
|
||||
.DEFAULT_PIC_ROW(PIC_ROW),
|
||||
.MAX_PIC_COL(PIC_COL),
|
||||
.MAX_PIC_ROW(PIC_ROW),
|
||||
.SCAN_ROWS(SCAN_ROWS),
|
||||
.OUT_BUF_BYTES(512),
|
||||
.OUT_BUF_AFULL_MARGIN(32),
|
||||
.IFIFO_DATA_WIDTH(IFIFO_DATA_WIDTH)
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.rst(rst),
|
||||
.cfg_pic_col(cfg_pic_col),
|
||||
.cfg_pic_row(cfg_pic_row),
|
||||
.ratio(ratio),
|
||||
.ififo_rclk(ififo_rclk),
|
||||
.ififo_rd(ififo_rd),
|
||||
.ififo_rdata(ififo_rdata),
|
||||
.ififo_empty(ififo_empty),
|
||||
.ififo_alempty(ififo_alempty),
|
||||
.ofifo_wclk(ofifo_wclk),
|
||||
.ofifo_wr(ofifo_wr),
|
||||
.ofifo_wdata(ofifo_wdata),
|
||||
.ofifo_full(ofifo_full),
|
||||
.ofifo_alfull(ofifo_alfull)
|
||||
);
|
||||
|
||||
always begin
|
||||
#2 clk = ~clk;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ififo_empty = 1'b0;
|
||||
ififo_alempty = 1'b0;
|
||||
if (fifo_rd_index >= FIFO_WORD_COUNT) begin
|
||||
ififo_empty = 1'b1;
|
||||
ififo_alempty = 1'b1;
|
||||
end else if (fifo_rd_index >= (FIFO_WORD_COUNT - 1)) begin
|
||||
ififo_alempty = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
function automatic bit read_non_comment_line(input int fd, output string line);
|
||||
int line_status;
|
||||
|
||||
line = "";
|
||||
line_status = $fgets(line, fd);
|
||||
while (line_status != 0) begin
|
||||
if ((line.len() != 0) && (line.getc(0) == "#")) begin
|
||||
line_status = $fgets(line, fd);
|
||||
end else begin
|
||||
return 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
return 1'b0;
|
||||
endfunction
|
||||
|
||||
initial begin
|
||||
clk = 1'b0;
|
||||
rst = 1'b1;
|
||||
cfg_pic_col = PIC_COL[12:0];
|
||||
cfg_pic_row = PIC_ROW[12:0];
|
||||
ratio = 4'd0;
|
||||
ofifo_full = 1'b0;
|
||||
ofifo_alfull = 1'b0;
|
||||
pattern_id = 0;
|
||||
ratio_id = 0;
|
||||
check_throughput = 0;
|
||||
trace_entropy = 0;
|
||||
input_pgm_path = "";
|
||||
jls_output_path = "tools/jls_compat/out/rtl_top_zero_8b.jls";
|
||||
stats_output_path = "";
|
||||
case_id = "top_smoke";
|
||||
|
||||
void'($value$plusargs("PATTERN=%d", pattern_id));
|
||||
void'($value$plusargs("IN_PGM=%s", input_pgm_path));
|
||||
void'($value$plusargs("RATIO=%d", ratio_id));
|
||||
void'($value$plusargs("OUT=%s", jls_output_path));
|
||||
void'($value$plusargs("STATS=%s", stats_output_path));
|
||||
void'($value$plusargs("CASE=%s", case_id));
|
||||
void'($value$plusargs("CHECK_THROUGHPUT=%d", check_throughput));
|
||||
void'($value$plusargs("TRACE_ENTROPY=%d", trace_entropy));
|
||||
ratio = ratio_id[3:0];
|
||||
|
||||
if (input_pgm_path != "") begin
|
||||
pgm_fd = $fopen(input_pgm_path, "rb");
|
||||
if (pgm_fd == 0) begin
|
||||
$fatal(1, "Failed to open input PGM file: %s", input_pgm_path);
|
||||
end
|
||||
|
||||
if (!read_non_comment_line(pgm_fd, pgm_header_line)) begin
|
||||
$fatal(1, "Failed to read PGM magic header from %s", input_pgm_path);
|
||||
end
|
||||
if ((pgm_header_line.len() < 2) ||
|
||||
(pgm_header_line.getc(0) != "P") ||
|
||||
(pgm_header_line.getc(1) != "5")) begin
|
||||
$fatal(1, "Input file is not a binary P5 PGM: %s", input_pgm_path);
|
||||
end
|
||||
|
||||
if (!read_non_comment_line(pgm_fd, pgm_header_line) ||
|
||||
($sscanf(pgm_header_line, "%d %d", pgm_header_width, pgm_header_height) != 2)) begin
|
||||
$fatal(1, "Failed to parse PGM dimensions from %s", input_pgm_path);
|
||||
end
|
||||
if ((pgm_header_width != PIC_COL) || (pgm_header_height != PIC_ROW)) begin
|
||||
$fatal(1, "PGM dimensions %0dx%0d do not match testbench generics %0dx%0d for %s",
|
||||
pgm_header_width, pgm_header_height, PIC_COL, PIC_ROW, input_pgm_path);
|
||||
end
|
||||
|
||||
if (!read_non_comment_line(pgm_fd, pgm_header_line) ||
|
||||
($sscanf(pgm_header_line, "%d", pgm_header_max_value) != 1)) begin
|
||||
$fatal(1, "Failed to parse PGM max value from %s", input_pgm_path);
|
||||
end
|
||||
if (pgm_header_max_value != SAMPLE_MAX_VALUE) begin
|
||||
$fatal(1, "PGM max value %0d does not match PIX_WIDTH=%0d sample max %0d for %s",
|
||||
pgm_header_max_value, PIX_WIDTH, SAMPLE_MAX_VALUE, input_pgm_path);
|
||||
end
|
||||
|
||||
pgm_bytes_read = $fread(pgm_payload, pgm_fd);
|
||||
if (pgm_bytes_read != PGM_BYTES_PER_IMAGE) begin
|
||||
$fatal(1, "PGM payload bytes %0d do not match expected %0d for %s",
|
||||
pgm_bytes_read, PGM_BYTES_PER_IMAGE, input_pgm_path);
|
||||
end
|
||||
if ($fgetc(pgm_fd) != -1) begin
|
||||
$fatal(1, "Unexpected trailing data after PGM payload in %s", input_pgm_path);
|
||||
end
|
||||
$fclose(pgm_fd);
|
||||
end
|
||||
|
||||
for (init_index = 0; init_index < FIFO_WORD_COUNT; init_index = init_index + 1) begin
|
||||
fifo_mem[init_index] = {IFIFO_DATA_WIDTH{1'b0}};
|
||||
image_index = init_index / PIXELS_PER_IMAGE;
|
||||
image_pixel_index = init_index % PIXELS_PER_IMAGE;
|
||||
pixel_x = image_pixel_index % PIC_COL;
|
||||
pixel_y = image_pixel_index / PIC_COL;
|
||||
|
||||
sample_value = 0;
|
||||
if (input_pgm_path != "") begin
|
||||
pgm_byte_index = image_pixel_index * PGM_BYTES_PER_SAMPLE;
|
||||
if (PGM_BYTES_PER_SAMPLE == 1) begin
|
||||
sample_value = pgm_payload[pgm_byte_index];
|
||||
end else begin
|
||||
sample_value = {pgm_payload[pgm_byte_index], pgm_payload[pgm_byte_index + 1]};
|
||||
end
|
||||
end else begin
|
||||
pattern_select = pattern_id;
|
||||
if (pattern_id == 9) begin
|
||||
pattern_select = image_index % 10;
|
||||
end
|
||||
|
||||
case (pattern_select)
|
||||
1: begin
|
||||
sample_value = image_pixel_index & SAMPLE_MAX_VALUE;
|
||||
end
|
||||
2: begin
|
||||
if ((pixel_x[0] ^ pixel_y[0]) != 0) begin
|
||||
sample_value = SAMPLE_MAX_VALUE;
|
||||
end
|
||||
end
|
||||
3: begin
|
||||
if (pixel_x >= (PIC_COL / 2)) begin
|
||||
sample_value = SAMPLE_MAX_VALUE;
|
||||
end
|
||||
end
|
||||
4: begin
|
||||
sample_value = ((pixel_x * 13) + (pixel_y * 29) +
|
||||
((pixel_x ^ pixel_y) * 7) + (image_index * 17)) &
|
||||
SAMPLE_MAX_VALUE;
|
||||
end
|
||||
5: begin
|
||||
sample_value = SAMPLE_MAX_VALUE - (image_pixel_index & SAMPLE_MAX_VALUE);
|
||||
end
|
||||
6: begin
|
||||
if (pixel_x >= pixel_y) begin
|
||||
sample_value = SAMPLE_MAX_VALUE;
|
||||
end
|
||||
end
|
||||
7: begin
|
||||
sample_value = ((pixel_x + pixel_y + image_index) >> 3) & SAMPLE_MAX_VALUE;
|
||||
end
|
||||
8: begin
|
||||
if (pixel_x[3] != 0) begin
|
||||
sample_value = SAMPLE_MAX_VALUE;
|
||||
end
|
||||
end
|
||||
9: begin
|
||||
sample_value = ((image_pixel_index * 37) + (image_index * 101) +
|
||||
((image_pixel_index >> 3) * 11)) & SAMPLE_MAX_VALUE;
|
||||
end
|
||||
default: begin
|
||||
sample_value = 0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
fifo_mem[init_index][PIX_WIDTH-1:0] = sample_value[PIX_WIDTH-1:0];
|
||||
|
||||
if (image_pixel_index == 0) begin
|
||||
fifo_mem[init_index][SOF_BIT_INDEX] = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
jls_fd = $fopen(jls_output_path, "wb");
|
||||
if (jls_fd == 0) begin
|
||||
$fatal(1, "Failed to open RTL top smoke output file");
|
||||
end
|
||||
|
||||
repeat (5) @(posedge clk);
|
||||
rst = 1'b0;
|
||||
|
||||
wait (eoi_count == EXPECTED_FRAME_COUNT);
|
||||
repeat (32) @(posedge clk);
|
||||
|
||||
if (image_start_sideband_count !== IMAGE_COUNT) begin
|
||||
$fatal(1, "Expected %0d original-image-start sideband bytes, got %0d",
|
||||
IMAGE_COUNT, image_start_sideband_count);
|
||||
end
|
||||
|
||||
if (output_byte_count < 32) begin
|
||||
$fatal(1, "Expected a nontrivial JPEG-LS strip frame, got only %0d bytes",
|
||||
output_byte_count);
|
||||
end
|
||||
|
||||
input_active_cycles = input_last_read_cycle - input_first_read_cycle + 1;
|
||||
if (input_active_cycles <= 0) begin
|
||||
$fatal(1, "Input throughput statistic is invalid: input_cycles=%0d",
|
||||
input_active_cycles);
|
||||
end
|
||||
|
||||
throughput_mpix_x1000 = (longint'(input_read_count) * 250000) / input_active_cycles;
|
||||
throughput_left_side = longint'(input_read_count) * 5;
|
||||
throughput_right_side = input_active_cycles * 4;
|
||||
|
||||
if ((check_throughput != 0) && (throughput_left_side < throughput_right_side)) begin
|
||||
$fatal(1, "Input throughput below 200 MPixel/s: reads=%0d cycles=%0d mpix_x1000=%0d",
|
||||
input_read_count, input_active_cycles, throughput_mpix_x1000);
|
||||
end
|
||||
|
||||
if (stats_output_path != "") begin
|
||||
stats_fd = $fopen(stats_output_path, "a");
|
||||
if (stats_fd == 0) begin
|
||||
$fatal(1, "Failed to open throughput stats output file");
|
||||
end
|
||||
$fdisplay(stats_fd, "%s,%0d,%0d,%0d,%0d,%0d,%0d,%0d,%0d,%0d,%0d,%0d",
|
||||
case_id, PIX_WIDTH, PIC_COL, PIC_ROW, IMAGE_COUNT, ratio_id,
|
||||
pattern_id, eoi_count, output_byte_count, input_read_count,
|
||||
input_active_cycles, throughput_mpix_x1000);
|
||||
$fclose(stats_fd);
|
||||
end
|
||||
|
||||
$display("PASS: tb_jpeg_ls_encoder_top_run_smoke pattern=%0d images=%0d frames=%0d bytes=%0d input_reads=%0d input_cycles=%0d throughput_mpix_x1000=%0d",
|
||||
pattern_id, IMAGE_COUNT, eoi_count, output_byte_count, input_read_count,
|
||||
input_active_cycles, throughput_mpix_x1000);
|
||||
$fclose(jls_fd);
|
||||
$finish;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
fifo_rd_index <= 0;
|
||||
ififo_rdata <= {IFIFO_DATA_WIDTH{1'b0}};
|
||||
cycle_count <= 0;
|
||||
input_read_count <= 0;
|
||||
input_first_read_cycle <= 0;
|
||||
input_last_read_cycle <= 0;
|
||||
input_read_started <= 1'b0;
|
||||
end else if (ififo_rd && !ififo_empty) begin
|
||||
cycle_count <= cycle_count + 1;
|
||||
ififo_rdata <= fifo_mem[fifo_rd_index];
|
||||
fifo_rd_index <= fifo_rd_index + 1;
|
||||
input_read_count <= input_read_count + 1;
|
||||
input_last_read_cycle <= cycle_count;
|
||||
if (!input_read_started) begin
|
||||
input_read_started <= 1'b1;
|
||||
input_first_read_cycle <= cycle_count;
|
||||
end
|
||||
end else begin
|
||||
cycle_count <= cycle_count + 1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
output_byte_count <= 0;
|
||||
image_start_sideband_count <= 0;
|
||||
first_output_seen <= 1'b0;
|
||||
last_output_was_ff <= 1'b0;
|
||||
eoi_count <= 0;
|
||||
end else if (ofifo_wr) begin
|
||||
output_byte_count <= output_byte_count + 1;
|
||||
$fwrite(jls_fd, "%c", ofifo_wdata[7:0]);
|
||||
|
||||
if (ofifo_wdata[8]) begin
|
||||
image_start_sideband_count <= image_start_sideband_count + 1;
|
||||
end
|
||||
|
||||
if (!first_output_seen) begin
|
||||
first_output_seen <= 1'b1;
|
||||
if (ofifo_wdata !== 9'h1FF) begin
|
||||
$fatal(1, "First output byte should be SOI marker prefix with sideband, got 0x%03h",
|
||||
ofifo_wdata);
|
||||
end
|
||||
end
|
||||
|
||||
if (last_output_was_ff && ofifo_wdata[7:0] == 8'hD9) begin
|
||||
eoi_count <= eoi_count + 1;
|
||||
end
|
||||
|
||||
last_output_was_ff <= (ofifo_wdata[7:0] == 8'hFF);
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (!rst && trace_entropy != 0) begin
|
||||
if (dut.mode_regular_valid && dut.mode_regular_ready) begin
|
||||
$display("TRACE regular x=%0d y=%0d X=%0d Ra=%0d Rb=%0d Rc=%0d Rd=%0d regpend=%0d",
|
||||
dut.mode_regular_x, dut.mode_regular_y, dut.mode_regular_sample,
|
||||
dut.mode_regular_Ra, dut.mode_regular_Rb, dut.mode_regular_Rc,
|
||||
dut.mode_regular_Rd, dut.regular_entropy_pending_count);
|
||||
end
|
||||
|
||||
if (dut.mode_run_segment_valid && dut.mode_run_segment_ready) begin
|
||||
$display("TRACE runseg len=%0d eol=%0d X=%0d x=%0d y=%0d Ra=%0d Rb=%0d regpend=%0d raw_ready=%0d",
|
||||
dut.mode_run_length, dut.mode_run_end_of_line,
|
||||
dut.mode_run_interruption_sample, dut.mode_run_interruption_x,
|
||||
dut.mode_run_interruption_y, dut.mode_run_Ra, dut.mode_run_Rb,
|
||||
dut.regular_entropy_pending_count, dut.run_core_segment_ready);
|
||||
end
|
||||
|
||||
if (dut.run_code_valid && dut.run_code_ready) begin
|
||||
$display("TRACE runcode count=%0d bits=0x%016h regpend=%0d",
|
||||
dut.run_code_bit_count, dut.run_code_bits,
|
||||
dut.regular_entropy_pending_count);
|
||||
end
|
||||
|
||||
if (dut.run_mapped_valid && dut.run_mapped_ready) begin
|
||||
$display("TRACE runmap M=%0d k=%0d limit=%0d qbpp=%0d runidx=%0d Jcur=%0d Jreg=%0d",
|
||||
dut.run_MErrval, dut.run_mapped_k, dut.run_mapped_limit,
|
||||
dut.run_mapped_qbpp, dut.run_mode_i.RUNindex,
|
||||
dut.run_mode_i.J_value, dut.run_mode_i.run_remainder_j_reg);
|
||||
end
|
||||
|
||||
if (dut.regular_mapped_valid && dut.regular_mapped_ready) begin
|
||||
$display("TRACE regmap M=%0d k=%0d limit=%0d qbpp=%0d",
|
||||
dut.regular_MErrval, dut.regular_mapped_k,
|
||||
dut.regular_mapped_limit, dut.regular_mapped_qbpp);
|
||||
end
|
||||
|
||||
if (dut.code_valid && dut.code_ready) begin
|
||||
$display("TRACE code count=%0d bits=0x%016h", dut.code_bit_count, dut.code_bits);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
repeat (SIM_TIMEOUT_CYCLES) @(posedge clk);
|
||||
$fatal(1, "Timeout waiting for top-level run smoke EOI");
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1118
fpga/srs/SystemVerilog coding style.md
Normal file
1118
fpga/srs/SystemVerilog coding style.md
Normal file
File diff suppressed because it is too large
Load Diff
765
fpga/srs/jpeg_ls.md
Normal file
765
fpga/srs/jpeg_ls.md
Normal file
@@ -0,0 +1,765 @@
|
||||
# JPEG-LS FPGA IP 核需求规格说明
|
||||
|
||||
## 1. 概述
|
||||
|
||||
### 1.1 项目目标
|
||||
|
||||
基于 ITU-T T.87 / ISO/IEC 14495-1 JPEG-LS Baseline 标准,使用
|
||||
SystemVerilog 实现 JPEG-LS 图像压缩 FPGA IP 核。目标器件为 Xilinx
|
||||
Virtex-7 XC7V690T,目标时钟频率为 250 MHz。
|
||||
|
||||
本 IP 面向灰度图像编码,输入处理流水线的峰值目标为 1 像素/周期,即
|
||||
250 MPixel/s。由于输出接口固定为 9 bit 字节流,端到端持续吞吐率受实际压缩
|
||||
码率、字节填充、头部开销和内部输出缓冲状态限制;当内部输出缓冲接近满时,
|
||||
允许暂停输入 FIFO 读取。
|
||||
|
||||
### 1.2 参考资源
|
||||
|
||||
- 标准:ITU-T T.87 / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
- 参考实现:https://github.com/team-charls/charls
|
||||
- jpeg.org 列出的 libjpeg 参考库:https://github.com/thorfdbg/libjpeg/tree/master
|
||||
- 动态误差控制参考:https://patents.google.com/patent/CN102088602A/zh
|
||||
- 目标芯片:Xilinx Virtex-7 XC7V690T
|
||||
- 目标频率:250 MHz
|
||||
- 设计图:`fpga/srs/jpeg_ls_design.drawio`,包含软件框图和工作流程图
|
||||
|
||||
后续需求评审中新增或变更的用户要求,必须及时补充进入本文档,避免对话上下文
|
||||
丢失导致实现偏离需求。项目推进原则为:尽早提出需要用户确认的问题;非必要不
|
||||
停止推进;若后续实现过程中发现会改变接口、码流结构、性能目标、验证准则或资源
|
||||
边界的事项,必须先提出评审。
|
||||
|
||||
### 1.3 支持范围
|
||||
|
||||
- 仅支持灰度图像,JPEG-LS 帧头中的 component 数量固定为 1。
|
||||
- 支持像素位宽:8、10、12、14、16 bit。
|
||||
- 第一版仅实现 encoder RTL,不实现 FPGA decoder。
|
||||
- 输出必须是标准 JPEG-LS 码流;第一版将一幅输入图像拆成多个水平条带,
|
||||
每个条带输出一个完整 `SOI ... EOI` JPEG-LS frame,多个 frame 按条带顺序
|
||||
连续输出。
|
||||
- 不输出 SPIFF header。
|
||||
- 支持 JPEG-LS regular mode 和 run mode。
|
||||
- 不插入 restart markers,不输出 DRI 段。
|
||||
- 通过独立条带 frame 结构支持分段动态调整 `NEAR`。
|
||||
- 每个条带覆盖完整行宽,默认每 16 行一个条带;每个条带 frame 内只包含一个
|
||||
JPEG-LS scan。
|
||||
- 每个条带 frame 开始时重置 JPEG-LS 上下文、行缓存边界和 run mode 状态。
|
||||
- 每个条带 frame 的 `SOS` 前显式输出 LSE preset coding parameters 段,参数值采用标准默认
|
||||
`MAXVAL/T1/T2/T3/RESET` 计算结果。
|
||||
- 每个条带 frame 的 `SOS` 段写入该条带当前使用的 `NEAR`。
|
||||
|
||||
### 1.4 码流结构决策与兼容性验证
|
||||
|
||||
第一版码流结构确定为:每个水平条带输出一个独立完整 JPEG-LS frame,多个 frame
|
||||
按条带顺序连续输出。每个条带 frame 的图像宽度等于 `active_pic_col`,图像高度等于
|
||||
`SCAN_ROWS`,component 数量固定为 1,且该 frame 内只包含一个 JPEG-LS scan。
|
||||
|
||||
外部 `ofifo_wdata[8]` 的帧开始标志只在原始输入图像的第一个条带的 `SOI` 第一个
|
||||
字节上置 1,避免外部系统将每个条带误判为一幅新图像。
|
||||
外部系统不依赖 `cfg_pic_row` 或 `SCAN_ROWS` 做分组,而是通过
|
||||
`ofifo_wdata[8]` 识别原始图像开始;同一原始图像的后续条带 frame 仍可通过
|
||||
JPEG-LS frame header 解析出条带宽高,并按输出顺序拼回原图。
|
||||
第一版不提供原始图像结束 sideband。外部系统通过下一次
|
||||
`ofifo_wdata[8]=1` 或输出流空/超时判断上一幅原始图像已经结束。
|
||||
|
||||
每个条带 frame 的 `SOS` 前输出 LSE preset coding parameters 段。若兼容性实验表明该
|
||||
位置不被 CharLS 或标准语法接受,则允许改为仅在标准允许位置输出 LSE,或不
|
||||
输出 LSE 并使用标准默认参数。
|
||||
`jls_header_writer` 架构必须保留 LSE 输出策略调整空间,不能把每条带输出
|
||||
LSE 固化为无法修改的路径。
|
||||
|
||||
兼容性实验工具放在 `tools/jls_compat`,CharLS 源码作为第三方参考工具放在
|
||||
`third_party/charls`。`tools/jls_compat/duplicate_sos_probe.py` 已使用
|
||||
imagecodecs/CharLS 2.4.1 验证:原始单 component 码流可解码,在同一 frame 中插入
|
||||
第二个 `SOS` 后被解码器拒绝。因此第一版 RTL 不采用单 frame 多 scan 水平条带结构;
|
||||
若后续需要重新尝试单 frame 多 scan,必须先提供独立的标准依据和 CharLS 3.x 动态
|
||||
解码通过结果。
|
||||
|
||||
## 2. 系统参数
|
||||
|
||||
| 参数名 | 类型 | 默认值 | 合法范围 | 说明 |
|
||||
| --- | --- | ---: | --- | --- |
|
||||
| `PIX_WIDTH` | int | 16 | 8, 10, 12, 14, 16 | 编译期像素位宽 |
|
||||
| `DEFAULT_PIC_COL` | int | 6144 | 16..6144 | 默认图像宽度 |
|
||||
| `DEFAULT_PIC_ROW` | int | 256 | 16..4096 | 默认图像高度 |
|
||||
| `MAX_PIC_COL` | int | 6144 | 6144 | 最大图像宽度 |
|
||||
| `MAX_PIC_ROW` | int | 4096 | 4096 | 最大图像高度 |
|
||||
| `SCAN_ROWS` | int | 16 | 1..4096 | 每个条带 frame 的行数 |
|
||||
| `MAX_NEAR` | int | 31 | 31 | 动态 `NEAR` 最大值 |
|
||||
| `OUT_BUF_BYTES` | int | 8192 | 实现相关 | 内部输出缓冲字节数 |
|
||||
| `OUT_BUF_AFULL_MARGIN` | int | 256 | 实现相关 | 输出缓冲暂停输入的余量 |
|
||||
|
||||
约束:
|
||||
|
||||
- `PIX_WIDTH` 不是运行时配置项,综合时固定。
|
||||
- 运行时图像尺寸由 `cfg_pic_col` 和 `cfg_pic_row` 决定。
|
||||
- 若任一尺寸配置非法,则整帧使用默认尺寸 `DEFAULT_PIC_COL x
|
||||
DEFAULT_PIC_ROW`。
|
||||
- 非法尺寸包括:宽度为 0、高度为 0、宽度小于 16、高度小于 16、宽度大于
|
||||
`MAX_PIC_COL`、高度大于 `MAX_PIC_ROW`、高度不能被 `SCAN_ROWS` 整除。
|
||||
- 上游应保证有效图像高度是 `SCAN_ROWS` 的整数倍。
|
||||
- `MAXVAL = 2^PIX_WIDTH - 1`,输入保留位不参与 `MAXVAL` 计算。
|
||||
|
||||
## 3. 端口定义
|
||||
|
||||
### 3.1 时钟与复位
|
||||
|
||||
| 标识 | 方向 | 位宽 | 名称 | 说明 |
|
||||
| --- | --- | ---: | --- | --- |
|
||||
| `clk` | 输入 | 1 | 主时钟 | 250 MHz,上升沿有效 |
|
||||
| `rst` | 输入 | 1 | 复位 | 同步复位,高电平有效 |
|
||||
|
||||
### 3.2 运行时配置
|
||||
|
||||
| 标识 | 方向 | 位宽 | 名称 | 说明 |
|
||||
| --- | --- | ---: | --- | --- |
|
||||
| `cfg_pic_col` | 输入 | 13 | 图像宽度 | SOF 像素被接收时采样,0 或非法值使用默认尺寸 |
|
||||
| `cfg_pic_row` | 输入 | 13 | 图像高度 | SOF 像素被接收时采样,0 或非法值使用默认尺寸 |
|
||||
| `ratio` | 输入 | 4 | 目标压缩比 | SOF 像素被接收时采样,整帧保持不变 |
|
||||
|
||||
`ratio` 编码:
|
||||
|
||||
| `ratio` | 目标压缩比 | `NEAR` 策略 |
|
||||
| ---: | --- | --- |
|
||||
| 0 | 1:1 | 强制无损,`NEAR=0`,不做动态增大 |
|
||||
| 1 | 1:2 | 动态调整 `NEAR` |
|
||||
| 2 | 1:4 | 动态调整 `NEAR` |
|
||||
| 3 | 1:8 | 动态调整 `NEAR` |
|
||||
| 其他 | 1:1 | 按 `ratio=0` 处理 |
|
||||
|
||||
配置时序要求:
|
||||
|
||||
- `cfg_pic_col`、`cfg_pic_row`、`ratio` 必须在 SOF 前至少 1 个 `clk` 稳定。
|
||||
- 配置必须在当前帧处理期间保持不变;帧内变化属于上游协议错误,RTL 不保证
|
||||
输出正确。
|
||||
|
||||
### 3.3 输入 FIFO 接口
|
||||
|
||||
| 标识 | 方向 | 位宽 | 名称 | 说明 |
|
||||
| --- | --- | ---: | --- | --- |
|
||||
| `ififo_rclk` | 输出 | 1 | 输入 FIFO 读时钟 | 与 `clk` 同频同相 |
|
||||
| `ififo_rd` | 输出 | 1 | 输入 FIFO 读使能 | 高电平有效 |
|
||||
| `ififo_rdata` | 输入 | `ceil(PIX_WIDTH/8)*9` | 输入 FIFO 数据 | 见输入打包格式 |
|
||||
| `ififo_empty` | 输入 | 1 | 输入 FIFO 空 | 高电平有效 |
|
||||
| `ififo_alempty` | 输入 | 1 | 输入 FIFO 即将空 | 高电平有效,用于读取优化 |
|
||||
|
||||
输入 FIFO 采用标准同步 FIFO 时序:`ififo_rd` 有效后,下一周期
|
||||
`ififo_rdata` 有效。
|
||||
|
||||
读取策略:
|
||||
|
||||
- 当 `ififo_alempty=0` 时,允许连续读取以提高吞吐率。
|
||||
- 当 `ififo_alempty=1` 时,转为保守读取模式,每次读取前检查
|
||||
`ififo_empty=0`。
|
||||
- 当 `ififo_empty=1` 或内部输出缓冲接近满时,`ififo_rd=0`,暂停读取输入
|
||||
FIFO。
|
||||
- 暂停期间保持当前图像坐标、编码上下文和已读像素处理状态;恢复后继续。
|
||||
- 已经从 FIFO 读出的当前像素必须完成编码并进入内部输出缓冲。
|
||||
|
||||
输入像素顺序固定为从左到右、从上到下的逐行扫描顺序。每帧输入
|
||||
`active_pic_col * active_pic_row` 个像素,中间允许因 FIFO 空或内部输出缓冲
|
||||
满而暂停,但不允许乱序、跳行或缺失像素。
|
||||
|
||||
JPEG-LS 邻域边界扩展按标准行缓存语义处理。条带顶行的上一行样本
|
||||
按 0 处理;左边界 `x=0` 时 `Ra=Rb`,`Rc` 使用上一行的左边界扩展
|
||||
样本,而不是固定为 0;右边界时 `Rd=Rb`。该规则用于保证与 CharLS
|
||||
等标准参考实现的码流兼容性。
|
||||
|
||||
SOF 协议:
|
||||
|
||||
- 编码器只在检测到输入像素的 SOF 标志为 1 后开始一帧图像处理。
|
||||
- 一帧图像的第一个像素 SOF 标志为 1,其余像素 SOF 标志必须为 0。
|
||||
- 等待帧开始时若收到 SOF 为 0 的像素,继续等待有效 SOF。
|
||||
- 一帧未收满 `active_pic_col * active_pic_row` 个像素时又收到新的 SOF,
|
||||
属于上游协议错误,RTL 不保证输出正确。
|
||||
|
||||
输入打包格式:
|
||||
|
||||
- `PIX_WIDTH=8`:`ififo_rdata[8]` 为 SOF,`ififo_rdata[7:0]` 为像素值。
|
||||
- `PIX_WIDTH=10/12/14/16`:`ififo_rdata[17]` 为 SOF,
|
||||
`ififo_rdata[16]` 为保留位,`ififo_rdata[PIX_WIDTH-1:0]` 为像素值。
|
||||
- 对于 10/12/14 bit,`ififo_rdata[15:PIX_WIDTH]` 为保留位。
|
||||
- 所有保留位需求上必须为 0,RTL 不做检查和处理。
|
||||
- 输入像素作为数值处理,无需定义输入字节序。
|
||||
|
||||
### 3.4 输出 FIFO 接口
|
||||
|
||||
| 标识 | 方向 | 位宽 | 名称 | 说明 |
|
||||
| --- | --- | ---: | --- | --- |
|
||||
| `ofifo_wclk` | 输出 | 1 | 输出 FIFO 写时钟 | 与 `clk` 同频同相 |
|
||||
| `ofifo_wr` | 输出 | 1 | 输出 FIFO 写使能 | 高电平有效 |
|
||||
| `ofifo_wdata` | 输出 | 9 | 输出 FIFO 数据 | `[8]` 为帧开始标志,`[7:0]` 为输出字节 |
|
||||
| `ofifo_full` | 输入 | 1 | 输出 FIFO 满 | 保留端口,第一版忽略 |
|
||||
| `ofifo_alfull` | 输入 | 1 | 输出 FIFO 即将满 | 保留端口,第一版忽略 |
|
||||
|
||||
输出协议:
|
||||
|
||||
- `ofifo_wdata[7:0]` 按标准 JPEG-LS marker stream 的字节顺序输出。
|
||||
- marker 按大端字节顺序输出,例如 `SOI=0xFF,0xD8` 时先输出 `0xFF`。
|
||||
- 每幅输入图像的第一个条带 frame 的 `SOI` 第一个字节输出时,`ofifo_wdata[8]`
|
||||
置 1。
|
||||
- 同一幅输入图像的后续字节,包括后续条带 frame 的 `SOI/SOF55/LSE/SOS/EOI`
|
||||
字节,`ofifo_wdata[8]` 均置 0。
|
||||
- 连续多帧图像输出时,每幅输入图像的第一个条带 frame 的第一个 `SOI` 字节均
|
||||
置 1。
|
||||
- `ofifo_full` 和 `ofifo_alfull` 第一版完全忽略;系统集成必须保证外部输出
|
||||
FIFO 能持续接收写入。
|
||||
- 若前级输入处理不忙,允许在上一帧码流尚未完全从 `ofifo` 输出时接收下一帧
|
||||
输入;输出字节流必须严格按输入帧顺序排队输出,不允许帧间字节交错。
|
||||
- 输出队列仅使用 `ofifo_wdata[8]` 随字节流标记原始输入图像的开始,不维护额外
|
||||
帧元数据 sideband。
|
||||
|
||||
## 4. 压缩比控制
|
||||
|
||||
### 4.1 目标统计
|
||||
|
||||
压缩比目标以 bit 为单位计算,不按字节向上取整:
|
||||
|
||||
- 原始输入 bit 数:`processed_pixel_count * PIX_WIDTH`。
|
||||
- `ratio=1` 的目标 bit 数:原始输入 bit 数 / 2。
|
||||
- `ratio=2` 的目标 bit 数:原始输入 bit 数 / 4。
|
||||
- `ratio=3` 的目标 bit 数:原始输入 bit 数 / 8。
|
||||
- 实际输出 bit 数:当前原始输入图像已生成并写入内部输出缓冲的所有条带
|
||||
JPEG-LS frame 码流字节数 * 8。
|
||||
|
||||
实际输出统计包含完整输出码流中的所有字节,包括 `SOI`、`SOF55`、`LSE`、
|
||||
`SOS`、熵编码 payload、byte stuffing、`EOI` 和其它必要 marker。
|
||||
|
||||
### 4.2 动态 `NEAR` 调节
|
||||
|
||||
- 每帧开始时 `NEAR` 初值为 0。
|
||||
- `ratio=0` 或非法 ratio 时,整帧 `NEAR` 固定为 0,不进行动态调整。
|
||||
- `ratio=1/2/3` 时,每个条带 frame 结束后比较累计实际输出 bit 数与累计目标
|
||||
bit 数,并调整下一条带 frame 的 `NEAR`。
|
||||
- 第一版调节策略:
|
||||
- 若累计实际输出 bit 数大于累计目标 bit 数,`NEAR = NEAR + 1`。
|
||||
- 若累计实际输出 bit 数小于累计目标 bit 数且 `NEAR > 0`,
|
||||
`NEAR = NEAR - 1`。
|
||||
- 若二者相等,`NEAR` 保持不变。
|
||||
- `NEAR` 钳位到 `0..31`。
|
||||
- 如果第一版效果不理想,再参考 CN102088602A 所述方法优化调节步长和累计偏差
|
||||
控制策略。
|
||||
|
||||
### 4.3 误差验收
|
||||
|
||||
- 对 `ratio=0`,要求 CharLS 和 libjpeg 参考库解码结果均与原图逐像素一致。
|
||||
- 对 `ratio=1/2/3`,每个像素的重建误差不得超过该像素所属条带 frame 的实际
|
||||
`NEAR`;全帧最大误差也必须不超过 31。
|
||||
- `ratio=1/2/3` 的压缩比目标误差阈值为 10%。如果 `NEAR` 已经达到 31 后
|
||||
仍无法满足目标压缩比误差,验证报告标记为 FAIL,但 RTL 不提供错误端口。
|
||||
- 验证报告需要记录每个条带 frame 的 `NEAR`、累计实际输出 bit 数和累计目标 bit
|
||||
数。
|
||||
- 第一版只报告 PSNR 等图像质量指标,不将其作为约束。
|
||||
|
||||
## 5. JPEG-LS 码流要求
|
||||
|
||||
- 每个条带 frame 输出完整标准 JPEG-LS marker stream:`SOI ... EOI`。
|
||||
- 灰度图像 component 数量固定为 1,component id 固定为 1。
|
||||
- 每个条带 frame 的 `SOF55` 中样本精度等于 `PIX_WIDTH`,宽度等于
|
||||
`active_pic_col`,高度等于 `SCAN_ROWS`。
|
||||
- `MAXVAL = 2^PIX_WIDTH - 1`。
|
||||
- 每个条带 frame 的 `SOS` 前输出 LSE preset coding parameters 段,参数值为
|
||||
标准默认参数;即使相邻条带 frame 的 `NEAR` 未变化,也照常输出。
|
||||
- 若兼容性实验表明该 LSE 输出方式不符合 CharLS 或标准语法要求,则按
|
||||
1.4 节的兼容性策略调整。
|
||||
- 每个条带 frame 的 `SOS` header 中写入该条带当前 `NEAR`。
|
||||
- 条带 frame 的 scan 结束进入 `EOI` marker 前,bit packer 必须按 JPEG-LS 规则补齐到字节
|
||||
边界,补齐产生的字节计入输出统计。
|
||||
- 熵编码段必须实现 JPEG-LS marker/zero-bit stuffing;stuffing 字节计入输出
|
||||
统计。
|
||||
- 禁止将 JPEG-LS marker/zero-bit stuffing 简化为传统 JPEG 的 0xFF 后插入
|
||||
0x00 byte stuffing;实现必须按 JPEG-LS 标准位填充规则处理。
|
||||
- 不输出 SPIFF header。
|
||||
- 不输出 DRI 段,不插入 restart markers。
|
||||
- 不实现 raw bypass;即使 `ratio=0` 时 JPEG-LS 码流大于原始图像,也继续输出
|
||||
标准 JPEG-LS 无损码流。
|
||||
|
||||
## 6. 模块架构
|
||||
|
||||
顶层模块名采用 `jpeg_ls_encoder_top`,内部模块统一使用 `jls_` 前缀。
|
||||
|
||||
建议模块划分:
|
||||
|
||||
| 模块 | 功能 |
|
||||
| --- | --- |
|
||||
| `jpeg_ls_encoder_top` | 顶层互连,只例化子模块 |
|
||||
| `jls_input_ctrl` | 输入 FIFO 读取、SOF 检测、尺寸与 ratio 采样 |
|
||||
| `jls_scan_ctrl` | 图像坐标、条带 frame 划分、条带边界重置控制 |
|
||||
| `jls_header_writer` | `SOI/SOF55/LSE/SOS/EOI` 等 marker 输出 |
|
||||
| `jls_predictor` | MED 预测器与边界处理 |
|
||||
| `jls_context_model` | regular mode 上下文建模与统计更新 |
|
||||
| `jls_run_mode` | run mode 检测、run 长度编码与中断样本处理 |
|
||||
| `jls_golomb_encoder` | 映射误差与 Golomb-Rice 编码 |
|
||||
| `jls_bit_packer` | 变长码 bit 打包、byte stuffing |
|
||||
| `jls_near_ctrl` | 条带 frame 级动态 `NEAR` 统计与更新 |
|
||||
| `jls_output_buffer` | 内部输出缓冲、按帧顺序输出到 9 bit FIFO |
|
||||
|
||||
近损模式下,预测历史、上下文更新和行缓存必须使用编码端重建像素值,而不是原始
|
||||
输入像素值,以保证与标准 JPEG-LS 解码端一致。
|
||||
|
||||
为提高流水线吞吐,允许使用原始像素值、旧行缓存值或其它可提前获得的值进行
|
||||
梯度、预测、context 读地址、run/regular mode 判断等投机预计算或预取;但这些
|
||||
投机结果不得直接提交为最终码流、上下文更新或重建历史。最终提交的 `Ra/Rb/Rc/Rd`、
|
||||
`Px`、`Q1/Q2/Q3`、context index、run/regular mode 选择、`Errval/MErrval/k` 和
|
||||
`A/B/C/N/Nn/RUNindex` 更新必须与真实编码端重建像素历史重新计算或校验后保持一致。
|
||||
若投机结果与真实重建邻域不一致,RTL 必须丢弃投机结果并重算、重放或暂停,禁止
|
||||
为了实现无气泡流水而输出非标准 JPEG-LS 码流。`NEAR=0` 无损场景中原始值与重建值
|
||||
理论等价,仍应在实现说明中明确等价依据;该等价不得推广到 `NEAR>0` 场景。
|
||||
当前 RTL 允许在 `NEAR=0` lossless strip 中将输入样本 `X` 立即提交为 line history
|
||||
中的 `Rx`,以减少重建反馈气泡;`NEAR>0` near-lossless strip 必须等待真实重建
|
||||
样本或使用经校验/重放保证等价的流水机制。
|
||||
regular mode 中,`jls_regular_error_quantizer` 在标准 Annex A.5 的 `Errval`
|
||||
量化和 modulo 规范化后已经得到真实重建样本 `Rx`;后续 Annex A.6 context
|
||||
更新、`MErrval` 映射和 Golomb 编码不会再改变该 `Rx`。因此 RTL 允许在 regular
|
||||
误差量化结果被接受后立即把 `Rx` 回写到 line history,以缩短 `NEAR>0`
|
||||
反馈等待;但 context 写回、`MErrval/k`、Golomb 码字和 bitstream 顺序仍必须按
|
||||
逐像素标准语义提交,禁止因为提前回写 `Rx` 而重排熵编码事件。
|
||||
`NEAR>0` 下,若重建写回像素不是行尾,且输入端当前等待的是同一行的下一列像素,
|
||||
`jls_neighbor_provider` 允许在同一时钟沿接受该下一像素,并把刚返回的 `Rx`
|
||||
旁路作为下一像素的 `Ra`。该优化仅适用于同一行连续像素;行尾到下一行 `x=0`
|
||||
的 bank 切换、`left_edge_Rc` 和 `row_left_Rb` 更新必须先完成,不能做同周期
|
||||
旁路。
|
||||
|
||||
内部输出缓冲仅用于吸收变长编码和单字节输出接口之间的短时速率差异,不保证在
|
||||
最坏无损图像下维持全帧输入不暂停。当输出缓冲剩余空间小于
|
||||
`OUT_BUF_AFULL_MARGIN` 时,输入读取逻辑必须暂停。`OUT_BUF_BYTES` 默认值为
|
||||
8192 bytes;如性能验证需要调整该参数,必须经需求评审确认。
|
||||
`OUT_BUF_AFULL_MARGIN` 默认值为 256 bytes,实现前必须估算 header burst、
|
||||
LSE/SOS、极端 Golomb 长码和 bit packer flush 等单次不可暂停最大写入量;若
|
||||
默认值不足,需提交评审后调大。
|
||||
|
||||
### 6.1 高流水线实现约束
|
||||
|
||||
为满足 250 MHz 时钟和 200 MPixel/s 连续输入吞吐目标,RTL 实现必须优先采用高
|
||||
流水线架构:
|
||||
|
||||
- 关键数据通路按预测、上下文建模、误差映射、Golomb 参数计算、熵编码、bit
|
||||
打包和输出缓冲等阶段拆分流水。
|
||||
- 每级流水的组合逻辑深度应尽量控制在较小范围内,避免跨多个算法步骤形成长
|
||||
组合路径。
|
||||
- 上下文表读写、line buffer 访问、Golomb 参数更新和 bit packer 累积状态更新
|
||||
应使用寄存器切分关键路径。
|
||||
- 上下文统计表读写必须保持 JPEG-LS 逐像素标准语义。连续像素访问同一
|
||||
context 且流水线存在读写冲突时,优先使用 bypass/forwarding 将更新后的
|
||||
`A/B/C/N` 等统计值转发给后续像素;若极端冲突无法旁路解决,允许插入暂停
|
||||
周期并计入吞吐统计。禁止为追求吞吐率读取旧 context,导致码流与标准语义不
|
||||
一致。
|
||||
- 允许采用投机流水隐藏“梯度计算依赖重建值、重建值又由后级产生”的闭环等待:
|
||||
前级可基于原始值或旧值先行预计算,但提交前必须基于真实重建邻域进行校验。
|
||||
校验一致时可使用投机结果;校验失败时必须重算或重放该像素,相关暂停计入
|
||||
吞吐统计。禁止把投机结果作为标准状态直接更新,以换取表面无气泡流水。
|
||||
- 优先使用时序逻辑保存中间结果,减少大规模组合表达式、深层 if-else 嵌套和
|
||||
宽位宽组合比较链。
|
||||
- 对除法、取模、可变长度移位、优先级编码等潜在长路径逻辑,应采用查表、分级
|
||||
计算或多周期流水方式实现。
|
||||
- `NEAR>0` 误差量化不得使用单周期大组合除法;优先采用倒数查表、乘法、商校正
|
||||
和寄存器切分的流水实现。若采用多周期计算,必须在验证中覆盖 `NEAR=31` 等
|
||||
最大值路径。
|
||||
- 与 `NEAR` 相关的 DSP 乘法关键路径必须在输入操作数侧和乘积输出侧按需要加入
|
||||
触发器流水,特别是 `Errval*(2*NEAR+1)`、`RANGE*(2*NEAR+1)`、倒数查表乘法
|
||||
和商校正相关路径。流水拆分不得改变 JPEG-LS 标准码流语义,新增延迟必须由
|
||||
valid/ready 或状态机显式对齐。
|
||||
- regular mode 的重建像素 `Rx` 可在误差量化流水级计算完成并被接受后回写
|
||||
line history,不需要等待 `MErrval` 映射、Golomb 编码或 bit packer 完成;
|
||||
该优化只作用于重建历史反馈,不能改变 entropy 事件顺序或 context 更新顺序。
|
||||
- `NEAR>0` 行缓存反馈允许同一行 `Rx -> Ra` 同周期旁路,以减少逐像素反馈气泡;
|
||||
行尾换行、strip 边界和任何非连续坐标场景必须停顿等待状态提交完成。
|
||||
- 允许熵编码和 bit packer 对极端长码进行多周期处理,并通过暂停输入保护内部
|
||||
状态;该暂停周期计入吞吐统计。
|
||||
- run mode 已提交一个 run 段后,若后续像素仍为非 EOL 的 run 匹配像素且不会
|
||||
立即产生熵输出,允许继续接收并累加下一段 run length;遇到 regular 像素、
|
||||
run interruption 或 EOL run 段时,必须等待前一 run 段熵输出完成,防止码流
|
||||
顺序被重排。
|
||||
- 允许条带边界上下文重置插入暂停周期,暂停周期计入吞吐统计,并应尽量降低
|
||||
对 200 MPixel/s 指标的影响;若影响明显,应采用 epoch/tag、双表或其它流水化
|
||||
重置方案优化。
|
||||
- 当前 RTL 应优先采用 context written-bit/epoch/tag 惰性初始化等价实现,在条带
|
||||
开始锁存默认 `A/B/C/N` 并把未写 context 读作默认值,避免 365 个 regular
|
||||
context 逐项清表形成固定长暂停。
|
||||
- 允许使用多周期计算,但不得牺牲主时钟频率或 200 MPixel/s 连续输入吞吐目标;
|
||||
若多周期实现形成吞吐瓶颈,必须继续拆分逻辑深度并流水化。
|
||||
- 复杂条件判断必须拆分为多个流水节拍,禁止在单个 `always_comb` 或
|
||||
`always_ff` 块中形成长条件链。
|
||||
- 复杂条件分支优先使用 `case` 或分级状态机实现,避免深层 `if-else` 链。
|
||||
- `LSE` 默认参数、阈值参数和与 `PIX_WIDTH/NEAR` 相关的固定组合可使用常量表或
|
||||
ROM 方式实现,避免运行时复杂组合计算。
|
||||
- 允许增加固定流水延迟换取时序收敛;首字节延迟和单帧输出完成延迟不作为硬性
|
||||
指标。
|
||||
- 时序优化必须用 quick synthesis 或更完整的实现报告验证;若缓冲、扇出属性、
|
||||
控制复制等优化尝试导致 WNS/TNS 恶化,应回退并记录原因。时序收敛评审应先导出
|
||||
全部负 slack 路径,再按是否包含 DSP48 和逻辑级数分类;当 DSP 乘法器路径暂时
|
||||
难以继续优化时,先优化不含 DSP 且逻辑级数大于 1 的 ready/CE、行边界判断、
|
||||
输出槽满/空判断等控制路径,降低非 DSP 路径压力后再复查 DSP 路径是否仍主导
|
||||
WNS。对宽 DSP 乘法的进一步拆分必须继续用综合结果验证,已经证明会显著恶化
|
||||
WNS 的手动高/低半字拆分方案不应保留。
|
||||
- 各模块输出原则上采用寄存器输出,跨模块接口避免直接串接长组合逻辑。
|
||||
- 为降低复位扇出和时序压力,允许同步复位只覆盖状态机、valid 标志、计数器、
|
||||
FIFO 指针等控制状态;纯数据流水寄存器可不复位,但必须由 valid 信号保证无效
|
||||
数据不会参与新的图像压缩。
|
||||
|
||||
## 7. 性能指标
|
||||
|
||||
| 指标 | 目标值 | 说明 |
|
||||
| --- | ---: | --- |
|
||||
| 时钟频率 | 250 MHz | 综合后目标频率 |
|
||||
| 输入流水线峰值 | 1 像素/周期 | 输出缓冲可用时的峰值处理能力 |
|
||||
| 峰值输入吞吐 | 250 MPixel/s | 多帧连续输入场景下的目标峰值 |
|
||||
| 输出接口吞吐 | 1 byte/周期 | `ofifo_wdata[7:0]` 固定字节流 |
|
||||
| 首字节延迟 | 实现相关 | 受 header 输出和内部流水线影响 |
|
||||
| 单帧输出完成延迟 | 不作强约束 | 受压缩率、byte stuffing 和输出接口限制 |
|
||||
|
||||
连续多帧输入吞吐测试定义为:连续 10 幅图像输入时,从第一幅图像第一个像素被
|
||||
读取开始,到第 10 幅图像最后一个像素被读取结束的平均输入吞吐率。
|
||||
在 250 MHz 时钟下,连续 10 帧测试的平均输入读取吞吐率应不低于
|
||||
200 MPixel/s。该硬性指标适用于 `ratio=1/2/3`、默认尺寸 `6144 x 256`、连续
|
||||
10 幅代表性图像的性能测试;`ratio=0` 无损模式只报告吞吐率,不作为
|
||||
200 MPixel/s 硬性约束。统计时排除上游 `ififo_empty=1` 导致的空等待周期;
|
||||
内部输出缓冲接近满、熵编码极端长码处理和条带边界重置导致的暂停周期计入吞吐统计。
|
||||
|
||||
### 7.1 带宽说明
|
||||
|
||||
16 bit 图像在 250 MHz、1 像素/周期输入时,原始输入数据率为 500 MByte/s。
|
||||
输出接口固定为每周期 1 byte,即 250 MByte/s。无损或低压缩率图像可能导致输出
|
||||
缓冲积压,因此端到端持续吞吐不承诺等于输入流水线峰值。
|
||||
|
||||
## 8. 编码规范
|
||||
|
||||
- 使用 SystemVerilog 实现。
|
||||
- 所有端口使用 `logic` 类型,不使用 `wire` 和 `reg` 端口类型。
|
||||
- 简单直连允许使用 `assign`;禁止在 `assign` 中实现两级及以上组合逻辑。
|
||||
- 组合逻辑采用 `always_comb`。
|
||||
- 时序逻辑采用 `always_ff`。
|
||||
- `always_ff` 中只能使用非阻塞赋值,禁止使用阻塞赋值。
|
||||
- `always_comb` 中使用阻塞赋值描述组合逻辑。
|
||||
- 禁止在 `always`、`always_comb`、`always_ff` 块内部定义局部变量;局部变量必须
|
||||
在过程块外声明。
|
||||
- 不使用 `task`。
|
||||
- 不使用复杂 `function`;仅允许简单、无状态、无全局变量依赖、不会形成长组合
|
||||
路径的函数。复杂算法逻辑必须拆分为模块或流水级。
|
||||
- 复杂条件分支优先使用 `case` 或状态机,避免深层 `if-else`。
|
||||
- 复杂条件判断必须拆分为多个节拍,不允许在一个过程块中完成过深判断链。
|
||||
- 统一时钟域,上升沿触发。
|
||||
- 所有模块采用同步复位,高电平有效。
|
||||
- 信号命名采用小写加下划线,例如 `pixel_data`。
|
||||
- 参数命名采用大写加下划线,例如 `PIX_WIDTH`。
|
||||
- 所有参数、关键内部变量和跨模块接口信号必须添加有意义的英文注释,说明物理
|
||||
含义、单位、合法范围和与 JPEG-LS 标准的关系,避免只重复变量名。
|
||||
- 与 JPEG-LS 标准伪代码重合的变量,应尽量沿用标准中的名称或可直接对应的名称,
|
||||
例如 `A`、`B`、`C`、`N`、`Nn`、`Ra`、`Rb`、`Rc`、`Rd`、`Px`、`Errval`、
|
||||
`MErrval`、`k`、`RUNindex`、`RUNval`、`RItype`、`EMErrval` 等;若因工程命名
|
||||
规范需要改写大小写或加前缀,必须在注释中标明对应的标准变量。
|
||||
- 对复杂数据结构和处理流程必须在代码或配套文档中给出示例说明,例如 context
|
||||
表项含义、line buffer 像素邻域、near-lossless 重建像素更新、Golomb code
|
||||
bitstream 打包、条带边界 flush、动态 `NEAR` 更新等。
|
||||
- 每个关键处理过程必须添加标准可追溯注释,标明标准名称、章节、图、表和伪代码
|
||||
或代码片段来源。若该处理过程没有对应图或表,应明确写 `Figure: N/A` 或
|
||||
`Table: N/A`;禁止凭记忆编造图号、表号或章节号。
|
||||
- 状态命名采用 `st_module_state` 风格。
|
||||
- 一个模块一个文件,文件名与模块名一致。
|
||||
- 顶层仅包含子模块例化。
|
||||
- 单模块不超过 1000 行。
|
||||
- 单文件注释率不低于 20%。
|
||||
- 代码注释使用英文。
|
||||
- RTL 设计文件不得包含 `ifdef SYNTHESIS`、`ifndef SYNTHESIS`、`translate_off/on`
|
||||
等导致仿真路径和综合路径行为不一致的分支;不得在 RTL 设计文件中放置仿真专用
|
||||
功能逻辑、断言、错误判定或影响验证 PASS/FAIL 的检查。调试打印若确需保留,
|
||||
必须先评审确认,且不得改变 RTL 行为或作为验证通过依据。默认情况下,所有此类
|
||||
检查必须放在 testbench、monitor、scoreboard 或验证脚本中。
|
||||
- 其它规则参见 `fpga/srs/SystemVerilog coding style.md`。
|
||||
|
||||
### 8.1 标准可追溯注释规范
|
||||
|
||||
关键模块文件头或关键过程块前必须包含如下格式的英文注释:
|
||||
|
||||
```systemverilog
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.4 Prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Standard prediction procedure, mapped to Px calculation
|
||||
// Example : See docs/jls_traceability.md, section "MED predictor example"
|
||||
```
|
||||
|
||||
标准引用约束:
|
||||
|
||||
- `Standard` 必须写完整标准名称:`ITU-T T.87 (06/1998) / ISO/IEC 14495-1
|
||||
JPEG-LS Baseline`。
|
||||
- `Clause` 必须填写官方标准中的精确章节、附录或小节号。
|
||||
- `Figure` 和 `Table` 必须填写官方标准中的精确编号;若无对应项,写 `N/A`。
|
||||
- `Pseudocode` 必须说明该 RTL 过程对应标准伪代码中的哪一段处理。
|
||||
- 复杂处理必须在配套文档中给出小规模输入示例、关键中间变量和输出结果。
|
||||
- 若实现逻辑与标准伪代码存在流水化、查表、旁路或多周期等结构差异,注释必须
|
||||
说明等价关系。
|
||||
- 不得在 RTL 注释中大段复制标准原文;只记录引用位置、变量对应关系和工程化
|
||||
说明。
|
||||
|
||||
处理过程与标准章节的初始对照如下,后续应在 RTL 前根据正式标准 PDF 补齐精确图
|
||||
号、表号和伪代码位置:
|
||||
|
||||
| 处理过程 | 主要 RTL 模块 | 标准章节 |
|
||||
| --- | --- | --- |
|
||||
| 编码总体流程 | `jpeg_ls_encoder_top`, `jls_scan_ctrl` | Clause 4.4, Annex A.8, Annex D.1-D.3 |
|
||||
| 单分量编码参数和压缩数据 | `jls_scan_ctrl`, `jls_header_writer` | Annex A.1 |
|
||||
| 初始化和约定 | `jls_scan_ctrl`, `jls_context_model` | Annex A.2 |
|
||||
| 上下文确定 | `jls_context_model` | Annex A.3, Annex G.1 |
|
||||
| MED 预测 | `jls_predictor` | Annex A.4 |
|
||||
| 预测误差编码 | `jls_golomb_encoder` | Annex A.5, Annex G.2 |
|
||||
| 上下文变量更新 | `jls_context_model` | Annex A.6 |
|
||||
| run mode 编码 | `jls_run_mode` | Annex A.7, Annex G.3 |
|
||||
| JPEG-LS 码流格式和 marker | `jls_header_writer`, `jls_bit_packer` | Annex C.1-C.4 |
|
||||
| scan 控制流程 | `jls_scan_ctrl`, `jls_header_writer` | Annex D.3 |
|
||||
| bitstream 输出示例 | `jls_bit_packer` | Annex H.2 |
|
||||
| 详细编码示例 | 多模块联合说明 | Annex H.3 |
|
||||
| 解码一致性验证 | 验证脚本、CharLS 和 libjpeg 参考库对比 | Annex F.1 |
|
||||
|
||||
## 9. 验证方案
|
||||
|
||||
### 9.1 工具与目录
|
||||
|
||||
- RTL 仿真使用 QuestaSim。
|
||||
- 仿真平台放在 `fpga/simulation`。
|
||||
- 综合工程和脚本放在 `fpga/synthesis`。
|
||||
- CharLS 参考工具放在 `third_party/charls`。
|
||||
- jpeg.org 列出的 libjpeg 参考库放在 `third_party/libjpeg`,用于与 CharLS 共同
|
||||
做标准码流兼容性验证。该库来源记录为 jpeg.org JPEG-LS Software 页面列出的
|
||||
`thorfdbg/libjpeg` 项目,GitHub 地址为
|
||||
https://github.com/thorfdbg/libjpeg/tree/master。
|
||||
- CharLS-JS 浏览器页面可作为人工快速查看工具:
|
||||
https://chafey.github.io/charls-js/test/browser/index.html
|
||||
- 条带多 frame 兼容性实验工具放在 `tools/jls_compat`。参考解码对比脚本使用
|
||||
`tools/jls_compat/reference_decode_compare.py`,该脚本通过 CharLS 与
|
||||
jpeg.org/libjpeg 参考库对 RTL 输出 `.rtljls` 做解码和像素比较。
|
||||
|
||||
### 9.2 验证方法
|
||||
|
||||
1. 单元测试:各子模块独立验证。
|
||||
2. 集成测试:完整 encoder 生成 JPEG-LS 码流。
|
||||
3. 标准兼容性测试:使用 CharLS 和 jpeg.org/libjpeg 参考库解码 RTL 输出码流。
|
||||
4. 误差测试:比较参考库解码图像与输入图像。
|
||||
5. 压缩比测试:按实际 bit 数统计目标值与输出值。
|
||||
6. 性能测试:统计输入吞吐、输出吞吐、暂停周期和内部输出缓冲水位。
|
||||
|
||||
参考库验证策略:
|
||||
- 冒烟测试阶段使用小规模测试集合优先跑通 libjpeg 参考库解码验证,用于快速发现
|
||||
marker、header、bit stuffing、条带多 frame 拼接等基础兼容性问题。
|
||||
- 设计成熟后,完整回归测试必须同时使用 CharLS 和 libjpeg 参考库解码 RTL 输出。
|
||||
- 若 CharLS 与 libjpeg 对同一码流的解码结果或错误判断不一致,验证报告必须记录
|
||||
差异,并将该用例标记为兼容性待评审项;在评审关闭前不得作为通过项。
|
||||
|
||||
### 9.3 测试用例
|
||||
|
||||
- 8 bit 灰度图像,`ratio=0` 无损编码。
|
||||
- 10 bit 灰度图像,`ratio=1/2/3` 近损编码。
|
||||
- 12 bit 灰度图像,`ratio=1/2/3` 近损编码。
|
||||
- 14 bit 灰度图像,`ratio=1/2/3` 近损编码。
|
||||
- 16 bit 灰度图像,`ratio=0` 无损编码。
|
||||
- 默认尺寸 `6144 x 256` 图像。
|
||||
- 最大行宽 `6144` 图像。
|
||||
- 高度最大值 `4096` 的边界测试。
|
||||
- 最小合法尺寸 `16 x 16`。
|
||||
- 连续 10 幅图像输入吞吐测试。
|
||||
- 输入 FIFO 暂停测试:`ififo_empty` 和 `ififo_alempty` 拉高场景。
|
||||
- 输出缓冲接近满导致输入暂停测试。
|
||||
- 条带多 frame 码流 CharLS 解码兼容性测试。
|
||||
- 条带多 frame 码流 libjpeg 参考库解码兼容性测试。
|
||||
- 小尺寸连续多图像冒烟测试:在同一输入流中连续送入多个 SOF 图像,检查每幅
|
||||
原始图像的输出起始 sideband、`SOI...EOI` frame 数量和参考解码结果;该冒烟
|
||||
不替代正式连续 10 幅默认尺寸吞吐测试。
|
||||
- 动态 `NEAR>0` 条带多 frame 冒烟测试:使用非零 `ratio` 触发后续条带近损编码,
|
||||
参考库解码必须成功,像素比较按 near-lossless 允许误差范围统计并报告。
|
||||
- 200 MPixel/s 性能测试:默认尺寸 `6144 x 256`、至少 10 幅代表性灰度图像,
|
||||
覆盖平滑、渐变、噪声、边缘和纹理场景;`ratio=1/2/3` 三组均需测试,任一组
|
||||
不达标则该性能项 FAIL;具体图像清单在验证计划中维护。
|
||||
阶段性可执行入口为 `fpga/sim/run_jls_throughput_regression.ps1`,该脚本使用
|
||||
`tb_jpeg_ls_encoder_top_run_smoke` 的 `+CHECK_THROUGHPUT=1` 模式,并把统计结果
|
||||
写入 `tools/jls_compat/out/rtl_throughput_stats.csv`。完整回归阶段仍必须结合
|
||||
CharLS 和 jpeg.org/libjpeg 参考解码验证码流兼容性。
|
||||
|
||||
验证报告必须记录:
|
||||
|
||||
- 每幅图像的配置尺寸、实际生效尺寸、`PIX_WIDTH` 和 `ratio`。
|
||||
- 每个条带 frame 的 `NEAR`、累计实际输出 bit 数、累计目标 bit 数。
|
||||
- 每幅图像的最终输出字节数、实际压缩比和目标压缩比偏差。
|
||||
- 每个条带 frame 的最大像素误差和每幅图像的最大像素误差。
|
||||
- 输入暂停周期数、输出缓冲最大水位。
|
||||
|
||||
仿真断言和检查项:
|
||||
|
||||
- 以下检查必须由 testbench、monitor、scoreboard 或验证脚本实现,不得依赖 RTL
|
||||
设计文件中的仿真专用分支;RTL 仿真和综合应使用同一套设计逻辑。
|
||||
- 若 `ofifo_full=1` 时 `ofifo_wr=1`,仿真报错。
|
||||
- 若内部输出缓冲 overflow,仿真报错。
|
||||
- 若连续 10 帧性能测试在排除上游 `ififo_empty=1` 周期后仍低于
|
||||
200 MPixel/s,仿真报错。
|
||||
- 若运行时尺寸配置非法,记录 warning,并回退默认尺寸。
|
||||
- 若完整回归阶段 CharLS 和 libjpeg 参考库解码结果不一致,仿真或验证脚本记录
|
||||
compatibility FAIL。
|
||||
- 仿真脚本不得只依赖仿真器进程退出码判断 PASS;必须同时检查日志中的
|
||||
`$fatal`/`** Fatal` 和非零 `Errors:` 汇总,避免 QuestaSim 在 `quit` 后返回 0
|
||||
时误报通过。
|
||||
|
||||
### 9.4 测试数据格式
|
||||
|
||||
- 输入测试数据使用 big-endian raw 像素文件。
|
||||
- `PIX_WIDTH=8` 时,每个像素按 1 byte 存储。
|
||||
- `PIX_WIDTH=10/12/14/16` 时,每个像素按 16 bit big-endian word 存储,高位
|
||||
补 0;压缩比目标仍按实际 `PIX_WIDTH` bit 计算。
|
||||
- 仿真脚本负责将 raw 输入文件转换为 FIFO word。
|
||||
- RTL 输入 FIFO 总线仍按 3.3 节定义,像素数值低位对齐,保留位为 0。
|
||||
- RTL 输出保存为 `.rtljls` 字节文件,用 CharLS 和 libjpeg 参考库解码验证。
|
||||
|
||||
### 9.5 固定测试图像集
|
||||
|
||||
- 固定测试图像必须作为仓库文件长期保存,回归和仿真默认直接读取这些文件,
|
||||
不允许在每次测试时重新生成随机图像。
|
||||
- 如需保留图像生成脚本,只能作为显式维护工具使用;测试入口不得隐式重建图像。
|
||||
- 固定测试图像使用标准二进制 PGM(`P5`)格式。
|
||||
- `16 bit` PGM 像素按 big-endian word 存储。
|
||||
- 当前固定测试图像均为无符号像素,命名中的 `s=0`;若后续扩展到有符号图像,
|
||||
则 `s=1`。
|
||||
- 固定测试图像命名规则为
|
||||
`<name>-w<w>-h<h>-s<s>-b<b>.pgm`,其中 `w` 为宽度像素数、`h` 为高度像素数、
|
||||
`s` 为符号标志(`0` 无符号,`1` 有符号)、`b` 为像素位宽。
|
||||
- CharLS 参考压缩码流命名规则为
|
||||
`<name>-w<w>-h<h>-s<s>-b<b>-r<r>.charlsjls`。
|
||||
- libjpeg 参考压缩码流命名规则为
|
||||
`<name>-w<w>-h<h>-s<s>-b<b>-r<r>.libjls`。
|
||||
- 上述命名中的 `r` 对应 RTL 的 `ratio` 配置值。
|
||||
- CharLS 参考压缩码流文件统一保存到 `img/reference/charls` 目录。
|
||||
- CharLS 参考压缩码流文件内容必须为纯 JPEG-LS interchange codestream,
|
||||
即从 `SOI` 开始到 `EOI` 结束,不允许在 `.charlsjls` 文件中保留 SPIFF
|
||||
包裹头;若第三方编码器默认输出 SPIFF 容器,参考生成脚本必须先剥离外层 SPIFF,
|
||||
再保存内部 `SOI..EOI` 码流。
|
||||
- libjpeg 参考压缩码流文件统一保存到 `img/reference/libjpeg` 目录。
|
||||
|
||||
### 9.6 扩展真实纹理图像
|
||||
|
||||
- `./img` 根目录中现有 4 幅原始图像作为真实纹理源,文件如下:
|
||||
- `omaha-w256-h256-b8-r0.img`
|
||||
- `sena-w256-h256-b8-r0.img`
|
||||
- `sensin-w256-h256-b8-r0.img`
|
||||
- `sinan-w256-h256-b8-r0.img`
|
||||
- 上述文件均为 `256 x 256 x 8 bit` raw 图像,每个文件大小固定为 `65536` byte。
|
||||
- 这 4 个 `.img` 文件仅作为维护性重建输入保留,不作为回归测试时直接读取的固定
|
||||
PGM 图像。
|
||||
- 第一版固定测试集中,需要将这 4 幅图像扩展为 `6144 x 256 x 16 bit` 标准 PGM。
|
||||
- 扩展方法为沿宽度方向重复复制原始 `256 x 256` 图像,直到覆盖完整的 `6144`
|
||||
列;如果复制后超过目标宽度,则裁剪多余列。
|
||||
- 扩展过程中保持原始行序和列序不变,不做插值、不做镜像、不做重排。
|
||||
- 原始 `8 bit` 像素值按线性比例扩展到目标 `16 bit` 无符号像素范围;
|
||||
`0x00 -> 0x0000`,`0xFF -> 0xFFFF`,即目标像素值等于 `raw8 * 257`
|
||||
(等价于 `(raw8 << 8) | raw8`),而不是仅放入低 `8 bit`。
|
||||
- 之前“低 `8 bit` 直写、高 `8 bit` 清零”的扩展方式已废止,不得继续用于真实纹理图像生成、
|
||||
参考码流生成或回归验证。
|
||||
- 扩展后的图像保存到 `img/patterns` 目录,作为固定测试图像集的一部分。
|
||||
- 扩展图像命名为 `*-w6144-h256-s0-b16.pgm`。
|
||||
|
||||
### 9.7 人工图案测试图像
|
||||
|
||||
- 人工图案图像保存到 `img/patterns` 目录。
|
||||
- 上述 4 幅真实纹理扩展图也放在 `img/patterns` 目录,与人工图案统一管理。
|
||||
- 第一版人工图案统一采用 `6144 x 256 x 16 bit`、无符号、标准 PGM。
|
||||
- 条带类和周期类图案默认空间周期为 `256` 像素。
|
||||
- 棋盘图案的单元尺寸固定为 `32 x 32` 像素。
|
||||
- 噪声图像必须使用固定随机种子,保证每次维护性重建时图像内容完全一致;
|
||||
第一版使用种子 `1`。
|
||||
- 点目标图像使用黑色背景,仅在四个角点和几何中心放置 `1x1` 白点。
|
||||
- 几何中心坐标定义为 `(3072, 128)`。
|
||||
- 需要固化以下人工图案:
|
||||
- `horizontal_stripes`:横向条带,像素值随 `y` 方向在 `0..65535` 间均匀变化。
|
||||
- `vertical_stripes`:纵向条带,像素值随 `x` 方向在 `0..65535` 间均匀变化。
|
||||
- `sine_stripes`:正弦条带,沿主变化方向按正弦规律在 `0..65535` 间变化。
|
||||
- `sawtooth_stripes`:锯齿条带,沿主变化方向按锯齿规律在 `0..65535` 间变化。
|
||||
- `diagonal_stripes`:对角线条带,按 `x+y` 相位在 `0..65535` 间周期变化。
|
||||
- `anti_diagonal_stripes`:反对角线条带,按 `x-y` 相位在 `0..65535` 间周期变化。
|
||||
- `concentric_stripes`:同心圆条带,相对图像中心按半径周期变化。
|
||||
- `checkerboard`:棋盘图案,黑白两色分别取 `0` 和 `65535`。
|
||||
- `noise_uniform_seed1`:均匀噪声图像,像素值在 `0..65535` 间无符号均匀分布。
|
||||
- `point_targets`:黑底点目标图像,点位于四角和中心,点值为 `65535`。
|
||||
- `white`:纯白图像,所有像素值均为 `65535`。
|
||||
- `black`:纯黑图像,所有像素值均为 `0`。
|
||||
- `diagonal_gradient`:对角线渐变图像,按全图归一化坐标从 `0` 线性渐变到
|
||||
`65535`。
|
||||
|
||||
## 10. 综合实现
|
||||
|
||||
采用 Vivado 进行综合验证,保证 RTL 可综合,并报告资源使用情况和综合后 Fmax。
|
||||
综合相关文件放在 `fpga/synthesis`。
|
||||
|
||||
目标 FPGA part 确认为 `xc7vx690tffg1761-2`。
|
||||
|
||||
Vivado quick synthesis 脚本需要保留在工程中,但在所有 RTL 模块完整实现前不作为
|
||||
自动推进步骤运行,避免过长综合时间影响迭代效率;仅在用户明确要求或完整模块
|
||||
集成后运行,用于确认可综合性、资源利用率趋势和初步 250 MHz 时序风险。quick
|
||||
synthesis 报告不替代最终综合实现报告。
|
||||
quick synthesis 脚本必须读取 `fpga/verilog/jpeg_ls_rtl.f` 中的完整 RTL 编译清单,
|
||||
不得使用早期手写子集产生顶层综合结论。
|
||||
|
||||
第一版暂不设置 LUT、FF、BRAM、DSP 的硬性资源上限;资源占用以 Vivado 综合报告
|
||||
为准,并在评审时结合 `OUT_BUF_BYTES` 等参数进行调整。
|
||||
|
||||
本轮 quick synthesis 阶段性结果:目标 part 为 `xc7vx690tffg1761-2`,
|
||||
约束周期 4.000 ns,`jpeg_ls_encoder_top` WNS 为 -0.615 ns,TNS 为 -175.754 ns,
|
||||
失败端点 537 个;按 4.615 ns 近似估算,OOC 综合层面的等效频率约为
|
||||
216.7 MHz,该结果不替代实现后 Fmax。资源使用为 LUT 22895、寄存器 6308、
|
||||
BRAM tile 3.5、DSP 14。最差路径仍为 `context_update_i/s1_near_scale_reg[6]`
|
||||
到 `context_update_i/s2_B_delta_reg/PCIN[*]` 的 DSP48E1 PCIN 路径,数据路径延迟
|
||||
3.557 ns,其中逻辑约占 86.889%、布线约占 13.111%。全量负 slack 路径已导出到
|
||||
`fpga/synthesis/quick_synth_reports/jpeg_ls_encoder_top_timing_violations_all.rpt`,
|
||||
分类 CSV 已导出到同目录;当前 537 条负 slack 路径中,260 条包含 DSP48,277 条为
|
||||
不含 DSP 且逻辑级数大于 1 的路径。已保留的非 DSP 优化包括:`jls_context_model`
|
||||
结果二级槽写使能去除下游 ready 依赖,`jls_scan_ctrl` 提前注册
|
||||
`enc_row_last_pixel` 以切断 `strip_width` 到 `jls_neighbor_provider.Rd` 的行尾判断路径,
|
||||
以及 `jls_regular_error_quantizer` 在 `STATE_IDLE` 接收下一像素、到 `STATE_FINISH`
|
||||
再等待输出槽的 ready 解耦。手动把 `jls_context_update` 的 33x8 乘法拆成高/低半字
|
||||
partial product 的试验曾导致 WNS 恶化到 -1.468 ns,已回退。
|
||||
|
||||
## 11. 实现计划
|
||||
|
||||
详细长期工作计划见 `docs/jls_work_plan.md`。本文档中的阶段和里程碑为需求级约束,
|
||||
`docs/jls_work_plan.md` 用于维护执行顺序、当前状态、阻塞项和自动推进策略。
|
||||
|
||||
| 阶段 | 内容 | 里程碑 |
|
||||
| --- | --- | --- |
|
||||
| 1 | 需求评审与 CharLS/libjpeg 兼容性实验 | 确认条带多 frame 方案 |
|
||||
| 2 | 架构设计与模块接口细化 | RTL 模块接口冻结 |
|
||||
| 3 | 核心模块 RTL 实现 | 各模块单元测试通过 |
|
||||
| 4 | 顶层集成与码流验证 | CharLS 和 libjpeg 参考库可解码输出码流 |
|
||||
| 5 | 动态 `NEAR` 调优与性能测试 | 压缩比统计和误差报告完成 |
|
||||
| 6 | 综合与时序优化 | 250 MHz 目标评估完成 |
|
||||
|
||||
## 12. 风险与对策
|
||||
|
||||
| 风险 | 影响 | 对策 |
|
||||
| --- | --- | --- |
|
||||
| 单 frame 多 scan 条带标准兼容性不足 | CharLS 或其它标准解码器不能解码 | 已改为每条带独立 JPEG-LS frame;保留兼容性探针防止回退 |
|
||||
| 输出接口带宽不足 | 内部输出缓冲满,输入被暂停 | 明确端到端吞吐受输出限制;输出缓冲参数化 |
|
||||
| 250 MHz 时序不收敛 | 达不到 200 MPixel/s 连续输入吞吐 | 高流水线实现、拆分长组合路径、寄存器切分上下文表和 bit packer 路径 |
|
||||
| 动态 `NEAR` 控制效果不足 | 压缩比偏差超过预期 | 先实现简单步进,再参考专利优化累计偏差控制 |
|
||||
| LSE/header 生成复杂 | 增加控制状态和验证工作 | 独立 `jls_header_writer` 模块并使用 CharLS 与 libjpeg 参考库做码流验证 |
|
||||
| run mode 实现错误 | 压缩效率下降或码流错误 | 单独建立 run mode 单元测试和边界样例 |
|
||||
|
||||
## 13. 交付物
|
||||
|
||||
1. SystemVerilog encoder RTL 代码。
|
||||
2. QuestaSim 测试平台和测试用例。
|
||||
3. CharLS 与 libjpeg 参考库兼容性实验和参考验证脚本。
|
||||
4. Vivado 综合脚本和约束文件。
|
||||
5. 验证报告。
|
||||
6. 使用文档。
|
||||
7. 标准可追溯说明文档:`docs/jls_traceability.md`,用于记录关键处理过程对应
|
||||
的标准名称、章节、图、表、伪代码位置、RTL 代码片段、变量对照和示例说明。
|
||||
8. 长期工作计划与验证计划:`docs/jls_work_plan.md`、`docs/jls_verification_plan.md`。
|
||||
9. Mermaid 算法流水流程图:`docs/jls_pipeline_mermaid.md`,用于记录当前 RTL
|
||||
实现的流水环节、输入输出、处理内容以及标准章节/伪代码对应关系。
|
||||
|
||||
---
|
||||
|
||||
文档版本:V1.1
|
||||
|
||||
更新日期:2026-04-15
|
||||
|
||||
目标芯片:Xilinx Virtex-7 XC7V690T
|
||||
|
||||
目标频率:250 MHz
|
||||
|
||||
峰值输入吞吐:250 MPixel/s
|
||||
294
fpga/srs/jpeg_ls_design.drawio
Normal file
294
fpga/srs/jpeg_ls_design.drawio
Normal file
@@ -0,0 +1,294 @@
|
||||
<mxfile host="app.diagrams.net" modified="2026-04-13T00:00:00.000Z" agent="Codex" version="24.7.17" type="device">
|
||||
<diagram id="jpeg-ls-architecture" name="软件框图">
|
||||
<mxGraphModel dx="1800" dy="1100" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1800" pageHeight="1100" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0"/>
|
||||
<mxCell id="1" parent="0"/>
|
||||
<mxCell id="title_arch" value="JPEG-LS FPGA Encoder 软件框图" style="text;html=1;strokeColor=none;fillColor=none;fontSize=24;fontStyle=1;align=center;verticalAlign=middle;" vertex="1" parent="1">
|
||||
<mxGeometry x="500" y="30" width="800" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="cfg" value="运行时配置<br>cfg_pic_col/cfg_pic_row<br>ratio<br>SOF 时采样" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="90" width="210" height="90" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="ififo" value="输入 FIFO<br>ceil(PIX_WIDTH/8)*9<br>SOF + 像素低位对齐<br>同步读:rd 后下一拍有效" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="240" width="230" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="input_ctrl" value="jls_input_ctrl<br>SOF 检测<br>尺寸/ratio 锁存<br>empty/alempty 读控制<br>输出缓冲满时暂停输入" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="350" y="230" width="240" height="120" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="scan_ctrl" value="jls_scan_ctrl<br>行列计数<br>SCAN_ROWS 条带划分<br>scan 边界控制<br>上下文重置触发" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="230" width="220" height="120" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="header" value="jls_header_writer<br>SOI/SOF55/LSE/SOS/EOI<br>每 scan 写 NEAR<br>LSE 策略可按兼容性调整" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="960" y="110" width="230" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="lse_table" value="LSE/阈值常量表<br>PIX_WIDTH + NEAR<br>默认 MAXVAL/T1/T2/T3/RESET" style="shape=cylinder3d;whiteSpace=wrap;html=1;boundedLbl=1;backgroundOutline=1;size=15;fillColor=#f8cecc;strokeColor=#b85450;fontSize=13;" vertex="1" parent="1">
|
||||
<mxGeometry x="1220" y="110" width="230" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="line_buf" value="line buffer / 重建像素缓存<br>存编码端重建值<br>支持 Ra/Rb/Rc/Rd 访问<br>BRAM/LUTRAM/SRL 混合" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="420" width="250" height="120" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="predictor" value="jls_predictor<br>MED 预测<br>边界像素处理<br>流水寄存器切分" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="360" y="450" width="220" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="mode" value="模式判定<br>regular / run mode<br>使用 case / 状态机<br>复杂条件拆多拍" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="360" y="610" width="220" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="context" value="jls_context_model<br>A/B/C/N 统计表<br>bypass/forwarding<br>禁止读旧 context" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="610" width="250" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="runmode" value="jls_run_mode<br>run 检测<br>run 长度编码<br>中断样本处理" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="760" width="250" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="golomb" value="jls_golomb_encoder<br>误差映射<br>Golomb-Rice 编码<br>极端长码可多周期" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="980" y="610" width="230" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="packer" value="jls_bit_packer<br>bit 打包<br>字节边界补齐<br>marker/zero-bit stuffing" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1280" y="610" width="240" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="near" value="jls_near_ctrl<br>scan 级 NEAR<br>累计目标 bit / 实际 bit<br>0..31 钳位" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="980" y="380" width="230" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="outbuf" value="jls_output_buffer<br>OUT_BUF_BYTES=8192<br>AFULL_MARGIN=256 需估算<br>只随字节流保留 ofifo[8]" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1280" y="380" width="240" height="120" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="ofifo" value="输出 FIFO<br>ofifo_wdata[8:0]<br>[8]=原图 SOI 开始标志<br>[7:0]=标准 JPEG-LS 字节流" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1560" y="400" width="220" height="120" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="verify" value="验证参考<br>CharLS / CharLS-JS<br>解码 .jls<br>误差和压缩比报告" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=14;arcSize=8;dashed=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="1560" y="610" width="220" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="note1" value="高主频约束<br>250 MHz 主时钟<br>ratio=1/2/3 需达 200 MPixel/s<br>复杂逻辑拆多拍,不牺牲主频" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=13;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="760" width="240" height="120" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="note2" value="复位策略<br>控制状态必须复位<br>纯数据流水寄存器可不复位<br>valid 保护无效数据" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=13;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="610" width="240" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_cfg_in" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="cfg" target="input_ctrl">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_ififo_in" value="pixel stream" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="ififo" target="input_ctrl">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_input_scan" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="input_ctrl" target="scan_ctrl">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_scan_header" value="frame/scan control" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="scan_ctrl" target="header">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_lse_header" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="lse_table" target="header">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_header_out" value="markers" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="header" target="outbuf">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_scan_pred" value="pixels + coords" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="scan_ctrl" target="predictor">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_line_pred" value="Ra/Rb/Rc/Rd" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="line_buf" target="predictor">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_pred_mode" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="predictor" target="mode">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_mode_context" value="regular" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="mode" target="context">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_mode_run" value="run" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="mode" target="runmode">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_context_golomb" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="context" target="golomb">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_run_golomb" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="runmode" target="golomb">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_golomb_packer" value="variable bits" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="golomb" target="packer">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_packer_outbuf" value="bytes" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="packer" target="outbuf">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_outbuf_ofifo" value="9 bit" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="outbuf" target="ofifo">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_outbuf_near" value="byte count" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="outbuf" target="near">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_scan_near" value="scan done" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="scan_ctrl" target="near">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_near_header" value="NEAR" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="near" target="header">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_near_context" value="NEAR" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="near" target="context">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_recon_line" value="reconstructed pixels" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="golomb" target="line_buf">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="e_ofifo_verify" value=".jls" style="endArrow=block;html=1;rounded=0;dashed=1;" edge="1" parent="1" source="ofifo" target="verify">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
<diagram id="jpeg-ls-workflow" name="工作流程">
|
||||
<mxGraphModel dx="1600" dy="1400" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1600" pageHeight="1400" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0"/>
|
||||
<mxCell id="1" parent="0"/>
|
||||
<mxCell id="title_flow" value="JPEG-LS Encoder 工作流程" style="text;html=1;strokeColor=none;fillColor=none;fontSize=24;fontStyle=1;align=center;verticalAlign=middle;" vertex="1" parent="1">
|
||||
<mxGeometry x="400" y="30" width="800" height="40" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_reset" value="复位 / idle<br>控制状态复位<br>数据流水由 valid 保护" style="ellipse;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="80" y="100" width="200" height="80" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_wait" value="等待输入 SOF=1<br>SOF=0 时继续等待" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="360" y="100" width="220" height="80" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_sample" value="采样配置<br>cfg_pic_col/cfg_pic_row/ratio<br>配置帧内保持不变" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="100" width="240" height="90" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_valid" value="尺寸是否合法?" style="rhombus;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="990" y="95" width="170" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_default" value="使用默认尺寸<br>6144 x 256<br>记录 warning" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1240" y="80" width="220" height="80" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_init" value="初始化帧状态<br>active 尺寸<br>NEAR=0<br>scan_index=0" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="250" width="240" height="90" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_header" value="输出帧/scan 头<br>SOI/SOF55<br>LSE 默认参数<br>SOS 写当前 NEAR" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="360" y="390" width="240" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_scan_reset" value="scan 开始<br>重置上下文/行缓存边界<br>必要暂停计入吞吐" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="660" y="390" width="250" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_read_ready" value="可读输入?<br>empty=0<br>输出缓冲有余量" style="rhombus;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="1000" y="385" width="190" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_pause" value="暂停 ififo_rd<br>等待 empty 解除或缓冲释放<br>内部暂停计入吞吐" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1270" y="400" width="250" height="90" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_pipe" value="读取像素并进入流水<br>MED 预测<br>regular/run 判定<br>context bypass/forwarding" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="360" y="570" width="260" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_entropy" value="熵编码流水<br>误差映射<br>Golomb-Rice<br>极端长码可多周期" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="700" y="570" width="240" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_pack" value="bit packer<br>字节打包<br>marker/zero-bit stuffing<br>写入内部输出缓冲" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1030" y="570" width="260" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_update_recon" value="更新重建像素历史<br>line buffer 使用重建值<br>context 统计写回" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="700" y="760" width="260" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_scan_done" value="当前 scan 完成?" style="rhombus;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="1040" y="760" width="190" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_align" value="scan 收尾<br>补齐到字节边界<br>统计实际输出 bit" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1280" y="750" width="230" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_ratio" value="ratio 是否为 0?" style="rhombus;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="760" y="940" width="180" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_near_fixed" value="NEAR 保持 0<br>无损模式<br>只报告吞吐" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="500" y="950" width="200" height="80" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_near_update" value="动态更新 NEAR<br>比较累计实际 bit 与目标 bit<br>钳位 0..31<br>下一 scan 生效" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1010" y="930" width="260" height="110" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_more_scan" value="还有 scan?" style="rhombus;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="760" y="1120" width="180" height="100" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_eoi" value="输出 EOI<br>完成当前原始图像<br>输出流按帧顺序排队" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f8cecc;strokeColor=#b85450;fontSize=14;arcSize=8;" vertex="1" parent="1">
|
||||
<mxGeometry x="1010" y="1130" width="230" height="90" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="f_next" value="前级不忙则可接收下一帧<br>ofifo[8] 在新原图 SOI 首字节置 1" style="ellipse;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;fontSize=14;" vertex="1" parent="1">
|
||||
<mxGeometry x="1290" y="1135" width="250" height="85" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_reset_wait" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_reset" target="f_wait">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_wait_sample" value="SOF=1" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_wait" target="f_sample">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_sample_valid" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_sample" target="f_valid">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_valid_default" value="否" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_valid" target="f_default">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_valid_init" value="是" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_valid" target="f_init">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_default_init" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_default" target="f_init">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_init_header" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_init" target="f_header">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_header_scan_reset" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_header" target="f_scan_reset">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_scan_read" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_scan_reset" target="f_read_ready">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_read_pause" value="否" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_read_ready" target="f_pause">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_pause_read" value="重试" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_pause" target="f_read_ready">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_read_pipe" value="是" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_read_ready" target="f_pipe">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_pipe_entropy" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_pipe" target="f_entropy">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_entropy_pack" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_entropy" target="f_pack">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_pack_update" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_pack" target="f_update_recon">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_update_done" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_update_recon" target="f_scan_done">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_not_done" value="否" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_scan_done" target="f_read_ready">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_done_align" value="是" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_scan_done" target="f_align">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_align_ratio" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_align" target="f_ratio">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_ratio_fixed" value="是" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_ratio" target="f_near_fixed">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_ratio_update" value="否" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_ratio" target="f_near_update">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_fixed_more" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_near_fixed" target="f_more_scan">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_update_more" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_near_update" target="f_more_scan">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_more_header" value="是" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_more_scan" target="f_header">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_more_eoi" value="否" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_more_scan" target="f_eoi">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_eoi_next" value="" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_eoi" target="f_next">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
<mxCell id="fe_next_wait" value="下一帧" style="endArrow=block;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1" source="f_next" target="f_wait">
|
||||
<mxGeometry relative="1" as="geometry"/>
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
139
fpga/synthesis/quick_synth.tcl
Normal file
139
fpga/synthesis/quick_synth.tcl
Normal file
@@ -0,0 +1,139 @@
|
||||
# Quick Vivado synthesis smoke for the JPEG-LS RTL.
|
||||
#
|
||||
# Usage:
|
||||
# vivado -mode batch -source fpga/synthesis/quick_synth.tcl
|
||||
# vivado -mode batch -source fpga/synthesis/quick_synth.tcl -tclargs -part xc7vx690tffg1761-2
|
||||
#
|
||||
# The target part can also be supplied with the JLS_FPGA_PART environment
|
||||
# variable. The default part is provisional and must not be treated as a final
|
||||
# project resource/timing target.
|
||||
|
||||
set script_dir [file dirname [file normalize [info script]]]
|
||||
set repo_root [file normalize [file join $script_dir "../.."]]
|
||||
set report_dir [file normalize [file join $script_dir "quick_synth_reports"]]
|
||||
|
||||
set target_part "xc7vx690tffg1761-2"
|
||||
set clock_period_ns "4.000"
|
||||
set requested_top "all"
|
||||
|
||||
if {[info exists ::env(JLS_FPGA_PART)] && $::env(JLS_FPGA_PART) ne ""} {
|
||||
set target_part $::env(JLS_FPGA_PART)
|
||||
}
|
||||
|
||||
set arg_index 0
|
||||
while {$arg_index < [llength $argv]} {
|
||||
set arg_key [lindex $argv $arg_index]
|
||||
incr arg_index
|
||||
if {$arg_index >= [llength $argv]} {
|
||||
puts "ERROR: missing value for $arg_key"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set arg_value [lindex $argv $arg_index]
|
||||
incr arg_index
|
||||
|
||||
switch -- $arg_key {
|
||||
"-part" {
|
||||
set target_part $arg_value
|
||||
}
|
||||
"-clock_period" {
|
||||
set clock_period_ns $arg_value
|
||||
}
|
||||
"-top" {
|
||||
set requested_top $arg_value
|
||||
}
|
||||
default {
|
||||
puts "ERROR: unknown argument $arg_key"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set rtl_filelist_path [file join $repo_root "fpga/verilog/jpeg_ls_rtl.f"]
|
||||
set rtl_files [list]
|
||||
set rtl_fd [open $rtl_filelist_path "r"]
|
||||
while {[gets $rtl_fd rtl_line] >= 0} {
|
||||
set rtl_line [string trim $rtl_line]
|
||||
if {$rtl_line eq ""} {
|
||||
continue
|
||||
}
|
||||
if {[string match "#*" $rtl_line]} {
|
||||
continue
|
||||
}
|
||||
|
||||
if {[file pathtype $rtl_line] eq "absolute"} {
|
||||
lappend rtl_files [file normalize $rtl_line]
|
||||
} else {
|
||||
lappend rtl_files [file normalize [file join $repo_root $rtl_line]]
|
||||
}
|
||||
}
|
||||
close $rtl_fd
|
||||
|
||||
set all_tops [list \
|
||||
"jpeg_ls_encoder_top" \
|
||||
"jls_input_ctrl" \
|
||||
"jls_preset_defaults" \
|
||||
"jls_header_writer" \
|
||||
"jls_near_ctrl" \
|
||||
]
|
||||
|
||||
if {$requested_top eq "all"} {
|
||||
set top_list $all_tops
|
||||
} else {
|
||||
set top_list [list $requested_top]
|
||||
}
|
||||
|
||||
file mkdir $report_dir
|
||||
|
||||
set summary_path [file join $report_dir "quick_synth_summary.csv"]
|
||||
set summary_fd [open $summary_path "w"]
|
||||
puts $summary_fd "top,part,clock_period_ns,wns_ns,clock_ports,report_dir"
|
||||
|
||||
puts "INFO: JPEG-LS quick synthesis"
|
||||
puts "INFO: repo_root = $repo_root"
|
||||
puts "INFO: report_dir = $report_dir"
|
||||
puts "INFO: target_part = $target_part"
|
||||
puts "INFO: clock_period_ns = $clock_period_ns"
|
||||
puts "INFO: rtl_filelist = $rtl_filelist_path"
|
||||
puts "INFO: rtl_file_count = [llength $rtl_files]"
|
||||
puts "INFO: top_list = $top_list"
|
||||
|
||||
foreach top_name $top_list {
|
||||
puts "INFO: quick synth top $top_name"
|
||||
|
||||
create_project -in_memory -part $target_part
|
||||
set_property target_language Verilog [current_project]
|
||||
|
||||
read_verilog -sv $rtl_files
|
||||
synth_design -top $top_name -part $target_part -mode out_of_context -flatten_hierarchy rebuilt
|
||||
|
||||
set clock_ports [get_ports -quiet clk]
|
||||
set clock_count [llength $clock_ports]
|
||||
if {$clock_count > 0} {
|
||||
create_clock -name clk -period $clock_period_ns $clock_ports
|
||||
}
|
||||
|
||||
set util_path [file join $report_dir "${top_name}_utilization.rpt"]
|
||||
set timing_path [file join $report_dir "${top_name}_timing_summary.rpt"]
|
||||
set clock_path [file join $report_dir "${top_name}_clock_utilization.rpt"]
|
||||
set dcp_path [file join $report_dir "${top_name}.dcp"]
|
||||
|
||||
report_utilization -file $util_path
|
||||
report_timing_summary -file $timing_path -delay_type max -max_paths 10
|
||||
if {$clock_count > 0} {
|
||||
report_clock_utilization -file $clock_path
|
||||
}
|
||||
write_checkpoint -force $dcp_path
|
||||
|
||||
set wns_value "NA"
|
||||
set timing_paths [get_timing_paths -quiet -max_paths 1]
|
||||
if {[llength $timing_paths] > 0} {
|
||||
set wns_value [get_property SLACK [lindex $timing_paths 0]]
|
||||
}
|
||||
|
||||
puts $summary_fd "$top_name,$target_part,$clock_period_ns,$wns_value,$clock_count,$report_dir"
|
||||
close_project
|
||||
}
|
||||
|
||||
close $summary_fd
|
||||
puts "INFO: quick synthesis summary: $summary_path"
|
||||
108
fpga/synthesis/report_timing_violations.tcl
Normal file
108
fpga/synthesis/report_timing_violations.tcl
Normal file
@@ -0,0 +1,108 @@
|
||||
# Detailed timing-violation extractor for JPEG-LS quick synthesis checkpoints.
|
||||
#
|
||||
# Usage:
|
||||
# vivado -mode batch -source fpga/synthesis/report_timing_violations.tcl
|
||||
# vivado -mode batch -source fpga/synthesis/report_timing_violations.tcl \
|
||||
# -tclargs -top jpeg_ls_encoder_top -max_paths 5000
|
||||
|
||||
set script_dir [file dirname [file normalize [info script]]]
|
||||
set repo_root [file normalize [file join $script_dir "../.."]]
|
||||
set report_dir [file normalize [file join $script_dir "quick_synth_reports"]]
|
||||
|
||||
set requested_top "jpeg_ls_encoder_top"
|
||||
set max_paths 5000
|
||||
set slack_limit 0.000
|
||||
|
||||
set arg_index 0
|
||||
while {$arg_index < [llength $argv]} {
|
||||
set arg_key [lindex $argv $arg_index]
|
||||
incr arg_index
|
||||
if {$arg_index >= [llength $argv]} {
|
||||
puts "ERROR: missing value for $arg_key"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set arg_value [lindex $argv $arg_index]
|
||||
incr arg_index
|
||||
|
||||
switch -- $arg_key {
|
||||
"-top" {
|
||||
set requested_top $arg_value
|
||||
}
|
||||
"-max_paths" {
|
||||
set max_paths $arg_value
|
||||
}
|
||||
"-slack_lesser_than" {
|
||||
set slack_limit $arg_value
|
||||
}
|
||||
default {
|
||||
puts "ERROR: unknown argument $arg_key"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set dcp_path [file join $report_dir "${requested_top}.dcp"]
|
||||
if {![file exists $dcp_path]} {
|
||||
puts "ERROR: checkpoint not found: $dcp_path"
|
||||
exit 1
|
||||
}
|
||||
|
||||
open_checkpoint $dcp_path
|
||||
|
||||
set detail_path [file join $report_dir "${requested_top}_timing_violations_all.rpt"]
|
||||
set csv_path [file join $report_dir "${requested_top}_timing_violations_summary.csv"]
|
||||
set non_dsp_csv_path [file join $report_dir "${requested_top}_timing_violations_non_dsp_logic_gt1.csv"]
|
||||
|
||||
report_timing \
|
||||
-delay_type max \
|
||||
-slack_lesser_than $slack_limit \
|
||||
-max_paths $max_paths \
|
||||
-sort_by slack \
|
||||
-file $detail_path
|
||||
|
||||
set paths [get_timing_paths \
|
||||
-delay_type max \
|
||||
-slack_lesser_than $slack_limit \
|
||||
-max_paths $max_paths \
|
||||
-sort_by slack]
|
||||
|
||||
set csv_fd [open $csv_path "w"]
|
||||
set non_dsp_fd [open $non_dsp_csv_path "w"]
|
||||
puts $csv_fd "index,slack_ns,logic_levels,has_dsp,source,destination"
|
||||
puts $non_dsp_fd "index,slack_ns,logic_levels,source,destination"
|
||||
|
||||
set index 0
|
||||
set non_dsp_index 0
|
||||
foreach path $paths {
|
||||
incr index
|
||||
|
||||
set slack [get_property SLACK $path]
|
||||
set source [get_property STARTPOINT_PIN $path]
|
||||
set destination [get_property ENDPOINT_PIN $path]
|
||||
set logic_levels [get_property LOGIC_LEVELS $path]
|
||||
|
||||
set path_report [report_timing -of_objects $path -return_string]
|
||||
set has_dsp 0
|
||||
if {[string first "DSP48" $path_report] >= 0} {
|
||||
set has_dsp 1
|
||||
}
|
||||
|
||||
puts $csv_fd "$index,$slack,$logic_levels,$has_dsp,\"$source\",\"$destination\""
|
||||
|
||||
if {$has_dsp == 0 && $logic_levels > 1} {
|
||||
incr non_dsp_index
|
||||
puts $non_dsp_fd "$index,$slack,$logic_levels,\"$source\",\"$destination\""
|
||||
}
|
||||
}
|
||||
|
||||
close $csv_fd
|
||||
close $non_dsp_fd
|
||||
|
||||
puts "INFO: timing violation detail report: $detail_path"
|
||||
puts "INFO: timing violation CSV: $csv_path"
|
||||
puts "INFO: non-DSP logic-level>1 CSV: $non_dsp_csv_path"
|
||||
puts "INFO: violating path count exported: $index"
|
||||
puts "INFO: non-DSP logic-level>1 path count exported: $non_dsp_index"
|
||||
|
||||
close_design
|
||||
31
fpga/synthesis/run_quick_synth.ps1
Normal file
31
fpga/synthesis/run_quick_synth.ps1
Normal file
@@ -0,0 +1,31 @@
|
||||
param(
|
||||
# Vivado target FPGA part.
|
||||
[string] $Part = $(if ($env:JLS_FPGA_PART) { $env:JLS_FPGA_PART } else { "xc7vx690tffg1761-2" }),
|
||||
|
||||
# Timing target for the main clk port, in ns. 4.000 ns corresponds to 250 MHz.
|
||||
[string] $ClockPeriod = "4.000",
|
||||
|
||||
# RTL top module to synthesize. The default synthesizes all current standalone tops.
|
||||
[string] $Top = "all",
|
||||
|
||||
# Vivado batch launcher. Override if Vivado is installed elsewhere.
|
||||
[string] $VivadoBat = $(if ($env:VIVADO_BAT) { $env:VIVADO_BAT } else { "C:\Xilinx\Vivado\2023.2\bin\vivado.bat" })
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
if (!(Test-Path -LiteralPath $VivadoBat)) {
|
||||
Write-Error "Vivado launcher not found: $VivadoBat. Set VIVADO_BAT or pass -VivadoBat."
|
||||
}
|
||||
|
||||
Write-Host "[jls-quick-synth] Vivado: $VivadoBat"
|
||||
Write-Host "[jls-quick-synth] Part: $Part"
|
||||
Write-Host "[jls-quick-synth] Clock period: $ClockPeriod ns"
|
||||
Write-Host "[jls-quick-synth] Top: $Top"
|
||||
|
||||
& $VivadoBat -mode batch -source fpga/synthesis/quick_synth.tcl -tclargs -part $Part -clock_period $ClockPeriod -top $Top
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
exit $LASTEXITCODE
|
||||
}
|
||||
|
||||
Write-Host "[jls-quick-synth] PASS"
|
||||
121
fpga/synthesis/timing_priority_synth.tcl
Normal file
121
fpga/synthesis/timing_priority_synth.tcl
Normal file
@@ -0,0 +1,121 @@
|
||||
# Timing-priority Vivado synthesis for the JPEG-LS RTL.
|
||||
#
|
||||
# Usage:
|
||||
# vivado -mode batch -source fpga/synthesis/timing_priority_synth.tcl
|
||||
# vivado -mode batch -source fpga/synthesis/timing_priority_synth.tcl -tclargs -top jpeg_ls_encoder_top
|
||||
# vivado -mode batch -source fpga/synthesis/timing_priority_synth.tcl -tclargs -directive PerformanceOptimized
|
||||
|
||||
set script_dir [file dirname [file normalize [info script]]]
|
||||
set repo_root [file normalize [file join $script_dir "../.."]]
|
||||
set report_dir [file normalize [file join $script_dir "timing_priority_reports"]]
|
||||
|
||||
set target_part "xc7vx690tffg1761-2"
|
||||
set clock_period_ns "4.000"
|
||||
set requested_top "jpeg_ls_encoder_top"
|
||||
set synth_directive "PerformanceOptimized"
|
||||
|
||||
if {[info exists ::env(JLS_FPGA_PART)] && $::env(JLS_FPGA_PART) ne ""} {
|
||||
set target_part $::env(JLS_FPGA_PART)
|
||||
}
|
||||
|
||||
set arg_index 0
|
||||
while {$arg_index < [llength $argv]} {
|
||||
set arg_key [lindex $argv $arg_index]
|
||||
incr arg_index
|
||||
if {$arg_index >= [llength $argv]} {
|
||||
puts "ERROR: missing value for $arg_key"
|
||||
exit 1
|
||||
}
|
||||
|
||||
set arg_value [lindex $argv $arg_index]
|
||||
incr arg_index
|
||||
|
||||
switch -- $arg_key {
|
||||
"-part" {
|
||||
set target_part $arg_value
|
||||
}
|
||||
"-clock_period" {
|
||||
set clock_period_ns $arg_value
|
||||
}
|
||||
"-top" {
|
||||
set requested_top $arg_value
|
||||
}
|
||||
"-directive" {
|
||||
set synth_directive $arg_value
|
||||
}
|
||||
default {
|
||||
puts "ERROR: unknown argument $arg_key"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set rtl_filelist_path [file join $repo_root "fpga/verilog/jpeg_ls_rtl.f"]
|
||||
set rtl_files [list]
|
||||
set rtl_fd [open $rtl_filelist_path "r"]
|
||||
while {[gets $rtl_fd rtl_line] >= 0} {
|
||||
set rtl_line [string trim $rtl_line]
|
||||
if {$rtl_line eq ""} {
|
||||
continue
|
||||
}
|
||||
if {[string match "#*" $rtl_line]} {
|
||||
continue
|
||||
}
|
||||
|
||||
if {[file pathtype $rtl_line] eq "absolute"} {
|
||||
lappend rtl_files [file normalize $rtl_line]
|
||||
} else {
|
||||
lappend rtl_files [file normalize [file join $repo_root $rtl_line]]
|
||||
}
|
||||
}
|
||||
close $rtl_fd
|
||||
|
||||
file mkdir $report_dir
|
||||
|
||||
set summary_path [file join $report_dir "timing_priority_summary.csv"]
|
||||
set summary_fd [open $summary_path "w"]
|
||||
puts $summary_fd "top,part,clock_period_ns,directive,wns_ns,clock_ports,report_dir"
|
||||
|
||||
puts "INFO: JPEG-LS timing-priority synthesis"
|
||||
puts "INFO: repo_root = $repo_root"
|
||||
puts "INFO: report_dir = $report_dir"
|
||||
puts "INFO: target_part = $target_part"
|
||||
puts "INFO: clock_period_ns = $clock_period_ns"
|
||||
puts "INFO: synth_directive = $synth_directive"
|
||||
puts "INFO: rtl_filelist = $rtl_filelist_path"
|
||||
puts "INFO: rtl_file_count = [llength $rtl_files]"
|
||||
puts "INFO: top = $requested_top"
|
||||
|
||||
create_project -in_memory -part $target_part
|
||||
set_property target_language Verilog [current_project]
|
||||
|
||||
read_verilog -sv $rtl_files
|
||||
synth_design -top $requested_top -part $target_part -mode out_of_context -flatten_hierarchy rebuilt -directive $synth_directive
|
||||
|
||||
set clock_ports [get_ports -quiet clk]
|
||||
set clock_count [llength $clock_ports]
|
||||
if {$clock_count > 0} {
|
||||
create_clock -name clk -period $clock_period_ns $clock_ports
|
||||
}
|
||||
|
||||
set util_path [file join $report_dir "${requested_top}_utilization.rpt"]
|
||||
set timing_path [file join $report_dir "${requested_top}_timing_summary.rpt"]
|
||||
set clock_path [file join $report_dir "${requested_top}_clock_utilization.rpt"]
|
||||
set dcp_path [file join $report_dir "${requested_top}.dcp"]
|
||||
|
||||
report_utilization -file $util_path
|
||||
report_timing_summary -file $timing_path -delay_type max -max_paths 10
|
||||
if {$clock_count > 0} {
|
||||
report_clock_utilization -file $clock_path
|
||||
}
|
||||
write_checkpoint -force $dcp_path
|
||||
|
||||
set wns_value "NA"
|
||||
set timing_paths [get_timing_paths -quiet -max_paths 1]
|
||||
if {[llength $timing_paths] > 0} {
|
||||
set wns_value [get_property SLACK [lindex $timing_paths 0]]
|
||||
}
|
||||
|
||||
puts $summary_fd "$requested_top,$target_part,$clock_period_ns,$synth_directive,$wns_value,$clock_count,$report_dir"
|
||||
close $summary_fd
|
||||
puts "INFO: timing-priority synthesis summary: $summary_path"
|
||||
289
fpga/verilog/jls_bit_packer.sv
Normal file
289
fpga/verilog/jls_bit_packer.sv
Normal file
@@ -0,0 +1,289 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 entropy-coded segment syntax
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : JPEG-LS bitstream packing and marker/zero-bit stuffing
|
||||
// Trace : docs/jls_traceability.md#bit-packing-and-stuffing
|
||||
// Example : Data bits 0xFF followed by seven 1 bits produce bytes FF 7F.
|
||||
//
|
||||
// Bit packer for JPEG-LS scan payload bytes. The input code word is left
|
||||
// aligned: the first bit to write is code_bits[MAX_CODE_BITS-1]. After a data
|
||||
// byte equal to 0xFF is emitted, the packer inserts one stuffed zero bit before
|
||||
// the next data bit as required by JPEG-LS marker/zero-bit stuffing.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_bit_packer #(
|
||||
// Maximum code bits accepted in one code event. Longer Golomb codes must be
|
||||
// split by the upstream encoder into multiple ordered events.
|
||||
parameter int MAX_CODE_BITS = 64
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Variable-length code event is valid.
|
||||
input var logic code_valid,
|
||||
|
||||
// This packer can accept a new code event.
|
||||
output logic code_ready,
|
||||
|
||||
// Left-aligned code bits. The first emitted bit is code_bits[MAX_CODE_BITS-1].
|
||||
input var logic [MAX_CODE_BITS-1:0] code_bits,
|
||||
|
||||
// Number of valid bits in code_bits.
|
||||
input var logic [6:0] code_bit_count,
|
||||
|
||||
// Flush request before EOI. The packer pads the current byte with zeros.
|
||||
input var logic flush_valid,
|
||||
|
||||
// This packer can accept a flush request.
|
||||
output logic flush_ready,
|
||||
|
||||
// Flush completed pulse.
|
||||
output logic flush_done,
|
||||
|
||||
// Packed scan payload byte is valid.
|
||||
output logic byte_valid,
|
||||
|
||||
// Downstream byte buffer can accept byte_data.
|
||||
input var logic byte_ready,
|
||||
|
||||
// Packed scan payload byte.
|
||||
output logic [7:0] byte_data
|
||||
);
|
||||
|
||||
// Code shift register and remaining bit count.
|
||||
logic [MAX_CODE_BITS-1:0] code_shift_reg;
|
||||
logic [6:0] bits_remaining;
|
||||
|
||||
// Current partially filled output byte. Bits are filled from MSB to LSB.
|
||||
logic [7:0] partial_byte;
|
||||
logic [3:0] partial_count;
|
||||
|
||||
// A zero bit must be inserted before the next data bit after emitting 0xFF.
|
||||
logic stuff_zero_pending;
|
||||
|
||||
// Flush is active while padding and any required stuffed zero bit remain.
|
||||
logic flush_active;
|
||||
|
||||
// Handshake and processing guards.
|
||||
logic output_slot_open;
|
||||
logic code_event_ready;
|
||||
logic accept_code;
|
||||
logic accept_flush;
|
||||
logic pack_step_active;
|
||||
|
||||
// Combinational work registers for one byte-pack step.
|
||||
logic [MAX_CODE_BITS-1:0] work_shift_reg;
|
||||
logic [6:0] work_bits_remaining;
|
||||
logic [7:0] work_partial_byte;
|
||||
logic [3:0] work_partial_count;
|
||||
logic work_stuff_zero_pending;
|
||||
logic work_flush_active;
|
||||
logic work_emit_byte;
|
||||
logic [7:0] work_emit_data;
|
||||
logic work_flush_done;
|
||||
logic [3:0] available_slots;
|
||||
logic [3:0] bits_to_take;
|
||||
logic [6:0] available_slots_ext;
|
||||
logic [7:0] code_top_byte;
|
||||
logic [7:0] take_mask;
|
||||
logic [7:0] insert_bits;
|
||||
|
||||
always_comb begin
|
||||
output_slot_open = 1'b0;
|
||||
if (!byte_valid || byte_ready) begin
|
||||
output_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
code_event_ready = 1'b0;
|
||||
if (!flush_active && bits_remaining == 7'd0 && output_slot_open) begin
|
||||
code_event_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
code_ready = code_event_ready;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
flush_ready = 1'b0;
|
||||
if (!flush_active && bits_remaining == 7'd0 && output_slot_open && !code_valid) begin
|
||||
flush_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_code = 1'b0;
|
||||
if (code_valid && code_ready && code_bit_count != 7'd0) begin
|
||||
accept_code = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_flush = 1'b0;
|
||||
if (flush_valid && flush_ready) begin
|
||||
accept_flush = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pack_step_active = 1'b0;
|
||||
if (output_slot_open && (bits_remaining != 7'd0 || flush_active || stuff_zero_pending)) begin
|
||||
pack_step_active = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
work_shift_reg = code_shift_reg;
|
||||
work_bits_remaining = bits_remaining;
|
||||
work_partial_byte = partial_byte;
|
||||
work_partial_count = partial_count;
|
||||
work_stuff_zero_pending = stuff_zero_pending;
|
||||
work_flush_active = flush_active;
|
||||
work_emit_byte = 1'b0;
|
||||
work_emit_data = 8'h00;
|
||||
work_flush_done = 1'b0;
|
||||
|
||||
available_slots = 4'd8 - work_partial_count;
|
||||
available_slots_ext = {3'd0, available_slots};
|
||||
bits_to_take = 4'd0;
|
||||
if (work_bits_remaining != 7'd0) begin
|
||||
bits_to_take = work_bits_remaining[3:0];
|
||||
if (work_bits_remaining >= available_slots_ext) begin
|
||||
bits_to_take = available_slots;
|
||||
end
|
||||
end
|
||||
|
||||
code_top_byte = work_shift_reg[MAX_CODE_BITS-1 -: 8];
|
||||
take_mask = 8'h00;
|
||||
case (bits_to_take)
|
||||
4'd1: begin
|
||||
take_mask = 8'h80;
|
||||
end
|
||||
|
||||
4'd2: begin
|
||||
take_mask = 8'hC0;
|
||||
end
|
||||
|
||||
4'd3: begin
|
||||
take_mask = 8'hE0;
|
||||
end
|
||||
|
||||
4'd4: begin
|
||||
take_mask = 8'hF0;
|
||||
end
|
||||
|
||||
4'd5: begin
|
||||
take_mask = 8'hF8;
|
||||
end
|
||||
|
||||
4'd6: begin
|
||||
take_mask = 8'hFC;
|
||||
end
|
||||
|
||||
4'd7: begin
|
||||
take_mask = 8'hFE;
|
||||
end
|
||||
|
||||
4'd8: begin
|
||||
take_mask = 8'hFF;
|
||||
end
|
||||
|
||||
default: begin
|
||||
take_mask = 8'h00;
|
||||
end
|
||||
endcase
|
||||
|
||||
insert_bits = (code_top_byte & take_mask) >> work_partial_count[2:0];
|
||||
|
||||
if (pack_step_active) begin
|
||||
if (work_stuff_zero_pending) begin
|
||||
work_partial_count = work_partial_count + 4'd1;
|
||||
work_stuff_zero_pending = 1'b0;
|
||||
|
||||
if (work_partial_count == 4'd8) begin
|
||||
work_emit_byte = 1'b1;
|
||||
work_emit_data = work_partial_byte;
|
||||
work_partial_byte = 8'h00;
|
||||
work_partial_count = 4'd0;
|
||||
end
|
||||
end else if (work_bits_remaining != 7'd0) begin
|
||||
work_partial_byte = work_partial_byte | insert_bits;
|
||||
work_partial_count = work_partial_count + bits_to_take;
|
||||
work_shift_reg = work_shift_reg << bits_to_take;
|
||||
work_bits_remaining = work_bits_remaining - {3'd0, bits_to_take};
|
||||
|
||||
if (work_partial_count == 4'd8) begin
|
||||
work_emit_byte = 1'b1;
|
||||
work_emit_data = work_partial_byte;
|
||||
if (work_partial_byte == 8'hFF) begin
|
||||
work_stuff_zero_pending = 1'b1;
|
||||
end
|
||||
work_partial_byte = 8'h00;
|
||||
work_partial_count = 4'd0;
|
||||
end
|
||||
end else if (work_flush_active && work_partial_count != 4'd0) begin
|
||||
work_emit_byte = 1'b1;
|
||||
work_emit_data = work_partial_byte;
|
||||
if (work_partial_byte == 8'hFF) begin
|
||||
work_stuff_zero_pending = 1'b1;
|
||||
end
|
||||
work_partial_byte = 8'h00;
|
||||
work_partial_count = 4'd0;
|
||||
end else if (work_flush_active && work_partial_count == 4'd0 &&
|
||||
!work_stuff_zero_pending) begin
|
||||
work_flush_active = 1'b0;
|
||||
work_flush_done = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
code_shift_reg <= {MAX_CODE_BITS{1'b0}};
|
||||
bits_remaining <= 7'd0;
|
||||
partial_byte <= 8'h00;
|
||||
partial_count <= 4'd0;
|
||||
stuff_zero_pending <= 1'b0;
|
||||
flush_active <= 1'b0;
|
||||
flush_done <= 1'b0;
|
||||
byte_valid <= 1'b0;
|
||||
byte_data <= 8'h00;
|
||||
end else begin
|
||||
flush_done <= 1'b0;
|
||||
|
||||
if (output_slot_open) begin
|
||||
byte_valid <= work_emit_byte;
|
||||
byte_data <= work_emit_data;
|
||||
end
|
||||
|
||||
if (pack_step_active) begin
|
||||
code_shift_reg <= work_shift_reg;
|
||||
bits_remaining <= work_bits_remaining;
|
||||
partial_byte <= work_partial_byte;
|
||||
partial_count <= work_partial_count;
|
||||
stuff_zero_pending <= work_stuff_zero_pending;
|
||||
flush_active <= work_flush_active;
|
||||
flush_done <= work_flush_done;
|
||||
end
|
||||
|
||||
if (accept_code) begin
|
||||
code_shift_reg <= code_bits;
|
||||
bits_remaining <= code_bit_count;
|
||||
end
|
||||
|
||||
if (accept_flush) begin
|
||||
flush_active <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
104
fpga/verilog/jls_byte_arbiter.sv
Normal file
104
fpga/verilog/jls_byte_arbiter.sv
Normal file
@@ -0,0 +1,104 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 marker stream byte order
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Marker bytes and entropy-coded bytes in stream order
|
||||
// Trace : docs/jls_traceability.md#jls-header-markers
|
||||
// Example : Header byte FF is forwarded before a waiting payload byte.
|
||||
//
|
||||
// Two-input byte-stream arbiter. Header/EOI bytes have priority over payload
|
||||
// bytes so a strip frame is emitted as SOI/SOF/LSE/SOS, payload, and then EOI.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_byte_arbiter (
|
||||
// Header or EOI byte from jls_header_writer is valid.
|
||||
input var logic header_valid,
|
||||
|
||||
// The selected downstream buffer accepted the header byte.
|
||||
output logic header_ready,
|
||||
|
||||
// Header byte and original-image-start sideband.
|
||||
input var logic [7:0] header_data,
|
||||
input var logic header_original_image_start,
|
||||
|
||||
// Scan payload byte from jls_bit_packer is valid.
|
||||
input var logic payload_valid,
|
||||
|
||||
// The selected downstream buffer accepted the payload byte.
|
||||
output logic payload_ready,
|
||||
|
||||
// Payload byte. It never carries original-image-start sideband.
|
||||
input var logic [7:0] payload_data,
|
||||
|
||||
// Arbitrated byte event to jls_output_buffer.
|
||||
output logic byte_valid,
|
||||
|
||||
// jls_output_buffer accepted the arbitrated byte event.
|
||||
input var logic byte_ready,
|
||||
|
||||
// Arbitrated byte and sideband.
|
||||
output logic [7:0] byte_data,
|
||||
output logic original_image_start
|
||||
);
|
||||
|
||||
// Header stream has priority whenever it is valid.
|
||||
logic select_header;
|
||||
logic select_payload;
|
||||
|
||||
always_comb begin
|
||||
select_header = header_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
select_payload = 1'b0;
|
||||
if (!select_header && payload_valid) begin
|
||||
select_payload = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_valid = 1'b0;
|
||||
if (select_header || select_payload) begin
|
||||
byte_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_data = 8'h00;
|
||||
original_image_start = 1'b0;
|
||||
|
||||
case ({select_header, select_payload})
|
||||
2'b10: begin
|
||||
byte_data = header_data;
|
||||
original_image_start = header_original_image_start;
|
||||
end
|
||||
|
||||
2'b01: begin
|
||||
byte_data = payload_data;
|
||||
end
|
||||
|
||||
default: begin
|
||||
byte_data = 8'h00;
|
||||
original_image_start = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
header_ready = 1'b0;
|
||||
if (select_header && byte_ready) begin
|
||||
header_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
payload_ready = 1'b0;
|
||||
if (select_payload && byte_ready) begin
|
||||
payload_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
251
fpga/verilog/jls_coding_params.sv
Normal file
251
fpga/verilog/jls_coding_params.sv
Normal file
@@ -0,0 +1,251 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : RANGE, qbpp, and LIMIT derivation from MAXVAL and NEAR
|
||||
// Trace : docs/jls_traceability.md#jls-coding-parameters
|
||||
// Example : PIX_WIDTH=8,NEAR=0 gives RANGE=256,qbpp=8,LIMIT=32.
|
||||
//
|
||||
// JPEG-LS coding parameter lookup. RANGE and qbpp depend on NEAR, but NEAR is
|
||||
// limited to 0..31 in this project. A lookup table avoids a synthesized
|
||||
// runtime divider and keeps this strip-level control path timing friendly.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_coding_params #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// JPEG-LS NEAR parameter for the current strip frame.
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// JPEG-LS RANGE parameter.
|
||||
output logic [16:0] RANGE,
|
||||
|
||||
// JPEG-LS quantized bits per sample, ceil(log2(RANGE)).
|
||||
output logic [4:0] qbpp,
|
||||
|
||||
// JPEG-LS LIMIT parameter used by regular-mode Golomb coding.
|
||||
output logic [6:0] LIMIT
|
||||
);
|
||||
|
||||
// Defensive clamp even though upstream NEAR is already limited to 31.
|
||||
logic [5:0] near_clamped;
|
||||
logic [16:0] range_next;
|
||||
logic [4:0] qbpp_next;
|
||||
logic [6:0] limit_next;
|
||||
|
||||
always_comb begin
|
||||
near_clamped = NEAR;
|
||||
if (NEAR > 6'd31) begin
|
||||
near_clamped = 6'd31;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
range_next = 17'd65536;
|
||||
qbpp_next = 5'd16;
|
||||
limit_next = 7'd64;
|
||||
|
||||
case (PIX_WIDTH)
|
||||
8: begin
|
||||
limit_next = 7'd32;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd256; qbpp_next = 5'd8; end
|
||||
6'd1: begin range_next = 17'd86; qbpp_next = 5'd7; end
|
||||
6'd2: begin range_next = 17'd52; qbpp_next = 5'd6; end
|
||||
6'd3: begin range_next = 17'd38; qbpp_next = 5'd6; end
|
||||
6'd4: begin range_next = 17'd30; qbpp_next = 5'd5; end
|
||||
6'd5: begin range_next = 17'd25; qbpp_next = 5'd5; end
|
||||
6'd6: begin range_next = 17'd21; qbpp_next = 5'd5; end
|
||||
6'd7: begin range_next = 17'd18; qbpp_next = 5'd5; end
|
||||
6'd8: begin range_next = 17'd16; qbpp_next = 5'd4; end
|
||||
6'd9: begin range_next = 17'd15; qbpp_next = 5'd4; end
|
||||
6'd10: begin range_next = 17'd14; qbpp_next = 5'd4; end
|
||||
6'd11: begin range_next = 17'd13; qbpp_next = 5'd4; end
|
||||
6'd12: begin range_next = 17'd12; qbpp_next = 5'd4; end
|
||||
6'd13: begin range_next = 17'd11; qbpp_next = 5'd4; end
|
||||
6'd14: begin range_next = 17'd10; qbpp_next = 5'd4; end
|
||||
6'd15: begin range_next = 17'd10; qbpp_next = 5'd4; end
|
||||
6'd16: begin range_next = 17'd9; qbpp_next = 5'd4; end
|
||||
6'd17: begin range_next = 17'd9; qbpp_next = 5'd4; end
|
||||
6'd18: begin range_next = 17'd8; qbpp_next = 5'd3; end
|
||||
6'd19: begin range_next = 17'd8; qbpp_next = 5'd3; end
|
||||
6'd20: begin range_next = 17'd8; qbpp_next = 5'd3; end
|
||||
6'd21: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd22: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd23: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd24: begin range_next = 17'd7; qbpp_next = 5'd3; end
|
||||
6'd25: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd26: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd27: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd28: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd29: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
6'd30: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
default: begin range_next = 17'd6; qbpp_next = 5'd3; end
|
||||
endcase
|
||||
end
|
||||
|
||||
10: begin
|
||||
limit_next = 7'd40;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd1024; qbpp_next = 5'd10; end
|
||||
6'd1: begin range_next = 17'd342; qbpp_next = 5'd9; end
|
||||
6'd2: begin range_next = 17'd206; qbpp_next = 5'd8; end
|
||||
6'd3: begin range_next = 17'd148; qbpp_next = 5'd8; end
|
||||
6'd4: begin range_next = 17'd115; qbpp_next = 5'd7; end
|
||||
6'd5: begin range_next = 17'd94; qbpp_next = 5'd7; end
|
||||
6'd6: begin range_next = 17'd80; qbpp_next = 5'd7; end
|
||||
6'd7: begin range_next = 17'd70; qbpp_next = 5'd7; end
|
||||
6'd8: begin range_next = 17'd62; qbpp_next = 5'd6; end
|
||||
6'd9: begin range_next = 17'd55; qbpp_next = 5'd6; end
|
||||
6'd10: begin range_next = 17'd50; qbpp_next = 5'd6; end
|
||||
6'd11: begin range_next = 17'd46; qbpp_next = 5'd6; end
|
||||
6'd12: begin range_next = 17'd42; qbpp_next = 5'd6; end
|
||||
6'd13: begin range_next = 17'd39; qbpp_next = 5'd6; end
|
||||
6'd14: begin range_next = 17'd37; qbpp_next = 5'd6; end
|
||||
6'd15: begin range_next = 17'd34; qbpp_next = 5'd6; end
|
||||
6'd16: begin range_next = 17'd32; qbpp_next = 5'd5; end
|
||||
6'd17: begin range_next = 17'd31; qbpp_next = 5'd5; end
|
||||
6'd18: begin range_next = 17'd29; qbpp_next = 5'd5; end
|
||||
6'd19: begin range_next = 17'd28; qbpp_next = 5'd5; end
|
||||
6'd20: begin range_next = 17'd26; qbpp_next = 5'd5; end
|
||||
6'd21: begin range_next = 17'd25; qbpp_next = 5'd5; end
|
||||
6'd22: begin range_next = 17'd24; qbpp_next = 5'd5; end
|
||||
6'd23: begin range_next = 17'd23; qbpp_next = 5'd5; end
|
||||
6'd24: begin range_next = 17'd22; qbpp_next = 5'd5; end
|
||||
6'd25: begin range_next = 17'd22; qbpp_next = 5'd5; end
|
||||
6'd26: begin range_next = 17'd21; qbpp_next = 5'd5; end
|
||||
6'd27: begin range_next = 17'd20; qbpp_next = 5'd5; end
|
||||
6'd28: begin range_next = 17'd19; qbpp_next = 5'd5; end
|
||||
6'd29: begin range_next = 17'd19; qbpp_next = 5'd5; end
|
||||
6'd30: begin range_next = 17'd18; qbpp_next = 5'd5; end
|
||||
default: begin range_next = 17'd18; qbpp_next = 5'd5; end
|
||||
endcase
|
||||
end
|
||||
|
||||
12: begin
|
||||
limit_next = 7'd48;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd4096; qbpp_next = 5'd12; end
|
||||
6'd1: begin range_next = 17'd1366; qbpp_next = 5'd11; end
|
||||
6'd2: begin range_next = 17'd820; qbpp_next = 5'd10; end
|
||||
6'd3: begin range_next = 17'd586; qbpp_next = 5'd10; end
|
||||
6'd4: begin range_next = 17'd456; qbpp_next = 5'd9; end
|
||||
6'd5: begin range_next = 17'd374; qbpp_next = 5'd9; end
|
||||
6'd6: begin range_next = 17'd316; qbpp_next = 5'd9; end
|
||||
6'd7: begin range_next = 17'd274; qbpp_next = 5'd9; end
|
||||
6'd8: begin range_next = 17'd242; qbpp_next = 5'd8; end
|
||||
6'd9: begin range_next = 17'd217; qbpp_next = 5'd8; end
|
||||
6'd10: begin range_next = 17'd196; qbpp_next = 5'd8; end
|
||||
6'd11: begin range_next = 17'd180; qbpp_next = 5'd8; end
|
||||
6'd12: begin range_next = 17'd165; qbpp_next = 5'd8; end
|
||||
6'd13: begin range_next = 17'd153; qbpp_next = 5'd8; end
|
||||
6'd14: begin range_next = 17'd143; qbpp_next = 5'd8; end
|
||||
6'd15: begin range_next = 17'd134; qbpp_next = 5'd8; end
|
||||
6'd16: begin range_next = 17'd126; qbpp_next = 5'd7; end
|
||||
6'd17: begin range_next = 17'd118; qbpp_next = 5'd7; end
|
||||
6'd18: begin range_next = 17'd112; qbpp_next = 5'd7; end
|
||||
6'd19: begin range_next = 17'd106; qbpp_next = 5'd7; end
|
||||
6'd20: begin range_next = 17'd101; qbpp_next = 5'd7; end
|
||||
6'd21: begin range_next = 17'd97; qbpp_next = 5'd7; end
|
||||
6'd22: begin range_next = 17'd92; qbpp_next = 5'd7; end
|
||||
6'd23: begin range_next = 17'd89; qbpp_next = 5'd7; end
|
||||
6'd24: begin range_next = 17'd85; qbpp_next = 5'd7; end
|
||||
6'd25: begin range_next = 17'd82; qbpp_next = 5'd7; end
|
||||
6'd26: begin range_next = 17'd79; qbpp_next = 5'd7; end
|
||||
6'd27: begin range_next = 17'd76; qbpp_next = 5'd7; end
|
||||
6'd28: begin range_next = 17'd73; qbpp_next = 5'd7; end
|
||||
6'd29: begin range_next = 17'd71; qbpp_next = 5'd7; end
|
||||
6'd30: begin range_next = 17'd69; qbpp_next = 5'd7; end
|
||||
default: begin range_next = 17'd66; qbpp_next = 5'd7; end
|
||||
endcase
|
||||
end
|
||||
|
||||
14: begin
|
||||
limit_next = 7'd56;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd16384; qbpp_next = 5'd14; end
|
||||
6'd1: begin range_next = 17'd5462; qbpp_next = 5'd13; end
|
||||
6'd2: begin range_next = 17'd3278; qbpp_next = 5'd12; end
|
||||
6'd3: begin range_next = 17'd2342; qbpp_next = 5'd12; end
|
||||
6'd4: begin range_next = 17'd1822; qbpp_next = 5'd11; end
|
||||
6'd5: begin range_next = 17'd1491; qbpp_next = 5'd11; end
|
||||
6'd6: begin range_next = 17'd1262; qbpp_next = 5'd11; end
|
||||
6'd7: begin range_next = 17'd1094; qbpp_next = 5'd11; end
|
||||
6'd8: begin range_next = 17'd965; qbpp_next = 5'd10; end
|
||||
6'd9: begin range_next = 17'd864; qbpp_next = 5'd10; end
|
||||
6'd10: begin range_next = 17'd782; qbpp_next = 5'd10; end
|
||||
6'd11: begin range_next = 17'd714; qbpp_next = 5'd10; end
|
||||
6'd12: begin range_next = 17'd657; qbpp_next = 5'd10; end
|
||||
6'd13: begin range_next = 17'd608; qbpp_next = 5'd10; end
|
||||
6'd14: begin range_next = 17'd566; qbpp_next = 5'd10; end
|
||||
6'd15: begin range_next = 17'd530; qbpp_next = 5'd10; end
|
||||
6'd16: begin range_next = 17'd498; qbpp_next = 5'd9; end
|
||||
6'd17: begin range_next = 17'd470; qbpp_next = 5'd9; end
|
||||
6'd18: begin range_next = 17'd444; qbpp_next = 5'd9; end
|
||||
6'd19: begin range_next = 17'd422; qbpp_next = 5'd9; end
|
||||
6'd20: begin range_next = 17'd401; qbpp_next = 5'd9; end
|
||||
6'd21: begin range_next = 17'd382; qbpp_next = 5'd9; end
|
||||
6'd22: begin range_next = 17'd366; qbpp_next = 5'd9; end
|
||||
6'd23: begin range_next = 17'd350; qbpp_next = 5'd9; end
|
||||
6'd24: begin range_next = 17'd336; qbpp_next = 5'd9; end
|
||||
6'd25: begin range_next = 17'd323; qbpp_next = 5'd9; end
|
||||
6'd26: begin range_next = 17'd311; qbpp_next = 5'd9; end
|
||||
6'd27: begin range_next = 17'd299; qbpp_next = 5'd9; end
|
||||
6'd28: begin range_next = 17'd289; qbpp_next = 5'd9; end
|
||||
6'd29: begin range_next = 17'd279; qbpp_next = 5'd9; end
|
||||
6'd30: begin range_next = 17'd270; qbpp_next = 5'd9; end
|
||||
default: begin range_next = 17'd262; qbpp_next = 5'd9; end
|
||||
endcase
|
||||
end
|
||||
|
||||
default: begin
|
||||
limit_next = 7'd64;
|
||||
case (near_clamped)
|
||||
6'd0: begin range_next = 17'd65536; qbpp_next = 5'd16; end
|
||||
6'd1: begin range_next = 17'd21846; qbpp_next = 5'd15; end
|
||||
6'd2: begin range_next = 17'd13108; qbpp_next = 5'd14; end
|
||||
6'd3: begin range_next = 17'd9364; qbpp_next = 5'd14; end
|
||||
6'd4: begin range_next = 17'd7283; qbpp_next = 5'd13; end
|
||||
6'd5: begin range_next = 17'd5959; qbpp_next = 5'd13; end
|
||||
6'd6: begin range_next = 17'd5043; qbpp_next = 5'd13; end
|
||||
6'd7: begin range_next = 17'd4370; qbpp_next = 5'd13; end
|
||||
6'd8: begin range_next = 17'd3856; qbpp_next = 5'd12; end
|
||||
6'd9: begin range_next = 17'd3451; qbpp_next = 5'd12; end
|
||||
6'd10: begin range_next = 17'd3122; qbpp_next = 5'd12; end
|
||||
6'd11: begin range_next = 17'd2851; qbpp_next = 5'd12; end
|
||||
6'd12: begin range_next = 17'd2623; qbpp_next = 5'd12; end
|
||||
6'd13: begin range_next = 17'd2429; qbpp_next = 5'd12; end
|
||||
6'd14: begin range_next = 17'd2261; qbpp_next = 5'd12; end
|
||||
6'd15: begin range_next = 17'd2116; qbpp_next = 5'd12; end
|
||||
6'd16: begin range_next = 17'd1987; qbpp_next = 5'd11; end
|
||||
6'd17: begin range_next = 17'd1874; qbpp_next = 5'd11; end
|
||||
6'd18: begin range_next = 17'd1773; qbpp_next = 5'd11; end
|
||||
6'd19: begin range_next = 17'd1682; qbpp_next = 5'd11; end
|
||||
6'd20: begin range_next = 17'd1600; qbpp_next = 5'd11; end
|
||||
6'd21: begin range_next = 17'd1526; qbpp_next = 5'd11; end
|
||||
6'd22: begin range_next = 17'd1458; qbpp_next = 5'd11; end
|
||||
6'd23: begin range_next = 17'd1396; qbpp_next = 5'd11; end
|
||||
6'd24: begin range_next = 17'd1339; qbpp_next = 5'd11; end
|
||||
6'd25: begin range_next = 17'd1286; qbpp_next = 5'd11; end
|
||||
6'd26: begin range_next = 17'd1238; qbpp_next = 5'd11; end
|
||||
6'd27: begin range_next = 17'd1193; qbpp_next = 5'd11; end
|
||||
6'd28: begin range_next = 17'd1151; qbpp_next = 5'd11; end
|
||||
6'd29: begin range_next = 17'd1112; qbpp_next = 5'd11; end
|
||||
6'd30: begin range_next = 17'd1076; qbpp_next = 5'd11; end
|
||||
default: begin range_next = 17'd1042; qbpp_next = 5'd11; end
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
RANGE = range_next;
|
||||
qbpp = qbpp_next;
|
||||
LIMIT = limit_next;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
51
fpga/verilog/jls_common_pkg.sv
Normal file
51
fpga/verilog/jls_common_pkg.sv
Normal file
@@ -0,0 +1,51 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.1, Annex C.1-C.4, Annex D.3
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Shared engineering constants for JPEG-LS strip-frame encoder
|
||||
// Example : See docs/jls_module_interfaces.md
|
||||
//
|
||||
// Shared package for the JPEG-LS RTL encoder. Keep this file limited to
|
||||
// simple constants and small type declarations; algorithmic logic belongs in
|
||||
// pipelined modules, not in package functions.
|
||||
|
||||
package jls_common_pkg;
|
||||
|
||||
// Number of bits used by cfg_pic_col/cfg_pic_row and image coordinates.
|
||||
localparam int JLS_DIM_WIDTH = 13;
|
||||
|
||||
// Number of bits used by the runtime ratio input port.
|
||||
localparam int JLS_RATIO_WIDTH = 4;
|
||||
|
||||
// Output FIFO byte plus original-image-start sideband bit.
|
||||
localparam int JLS_OFIFO_WIDTH = 9;
|
||||
|
||||
// JPEG-LS NEAR is clamped to 0..31 in this IP.
|
||||
localparam int JLS_NEAR_WIDTH = 6;
|
||||
|
||||
// JPEG marker byte used by SOI/SOF55/LSE/SOS/EOI marker generation.
|
||||
localparam logic [7:0] JLS_MARKER_PREFIX = 8'hFF;
|
||||
|
||||
// JPEG-LS marker codes used by the header writer.
|
||||
localparam logic [7:0] JLS_MARKER_SOI = 8'hD8;
|
||||
localparam logic [7:0] JLS_MARKER_EOI = 8'hD9;
|
||||
localparam logic [7:0] JLS_MARKER_SOF55 = 8'hF7;
|
||||
localparam logic [7:0] JLS_MARKER_LSE = 8'hF8;
|
||||
localparam logic [7:0] JLS_MARKER_SOS = 8'hDA;
|
||||
|
||||
// Runtime compression-ratio encodings from the SRS ratio port.
|
||||
typedef enum logic [JLS_RATIO_WIDTH-1:0] {
|
||||
JLS_RATIO_LOSSLESS = 4'd0,
|
||||
JLS_RATIO_1_TO_2 = 4'd1,
|
||||
JLS_RATIO_1_TO_4 = 4'd2,
|
||||
JLS_RATIO_1_TO_8 = 4'd3
|
||||
} jls_ratio_e;
|
||||
|
||||
// High-level strip-frame control events.
|
||||
typedef enum logic [1:0] {
|
||||
JLS_STRIP_EVENT_NONE = 2'd0,
|
||||
JLS_STRIP_EVENT_START = 2'd1,
|
||||
JLS_STRIP_EVENT_FINISH = 2'd2
|
||||
} jls_strip_event_e;
|
||||
|
||||
endpackage
|
||||
215
fpga/verilog/jls_context_memory.sv
Normal file
215
fpga/verilog/jls_context_memory.sv
Normal file
@@ -0,0 +1,215 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.2 initialization, Annex A.6 variables update
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Regular-mode context arrays A[0..364], B[0..364], C[0..364], N[0..364]
|
||||
// Trace : docs/jls_traceability.md#context-update
|
||||
// Example : RANGE=256 initializes A to max(2,(RANGE+32)/64)=4.
|
||||
//
|
||||
// Regular context storage. This module uses lazy strip initialization: strip
|
||||
// start clears a written-bit vector and latches the Annex A.2 default A value.
|
||||
// A later read of an unwritten context returns the default A/B/C/N tuple, while
|
||||
// a written context returns the RAM value. This is equivalent to writing all
|
||||
// 365 contexts at strip start, but avoids a long boundary stall.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_context_memory #(
|
||||
// Number of regular-mode contexts, indexed by abs((Q1*9+Q2)*9+Q3).
|
||||
parameter int CONTEXT_COUNT = 365
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Start lazy initialization for a new standalone strip frame.
|
||||
input var logic init_valid,
|
||||
|
||||
// Initialization command can be accepted.
|
||||
output logic init_ready,
|
||||
|
||||
// JPEG-LS RANGE parameter used to initialize A[Q].
|
||||
input var logic [16:0] init_RANGE,
|
||||
|
||||
// High while a multi-cycle initializer is active. The lazy path keeps this
|
||||
// low because the reset happens in the command-accept cycle.
|
||||
output logic init_busy,
|
||||
|
||||
// One-cycle pulse after lazy initialization has been applied.
|
||||
output logic init_done,
|
||||
|
||||
// Registered read request.
|
||||
input var logic read_valid,
|
||||
|
||||
// Read request can be accepted.
|
||||
output logic read_ready,
|
||||
|
||||
// Context index to read.
|
||||
input var logic [8:0] read_context_index,
|
||||
|
||||
// Read result is valid.
|
||||
output logic read_result_valid,
|
||||
|
||||
// Downstream stage accepted the read result.
|
||||
input var logic read_result_ready,
|
||||
|
||||
// Read context index and variables.
|
||||
output logic [8:0] read_result_context_index,
|
||||
output logic [31:0] read_A,
|
||||
output logic signed [31:0] read_B,
|
||||
output logic signed [8:0] read_C,
|
||||
output logic [15:0] read_N,
|
||||
|
||||
// Writeback request after context update arithmetic.
|
||||
input var logic write_valid,
|
||||
|
||||
// Writeback can be accepted.
|
||||
output logic write_ready,
|
||||
|
||||
// Context index and updated variables to write.
|
||||
input var logic [8:0] write_context_index,
|
||||
input var logic [31:0] write_A,
|
||||
input var logic signed [31:0] write_B,
|
||||
input var logic signed [8:0] write_C,
|
||||
input var logic [15:0] write_N
|
||||
);
|
||||
|
||||
// Context variable memories.
|
||||
logic [31:0] A_mem [0:CONTEXT_COUNT-1];
|
||||
logic signed [31:0] B_mem [0:CONTEXT_COUNT-1];
|
||||
logic signed [8:0] C_mem [0:CONTEXT_COUNT-1];
|
||||
logic [15:0] N_mem [0:CONTEXT_COUNT-1];
|
||||
|
||||
// Lazy initialization state. context_written[Q]=0 means context Q still has
|
||||
// the current strip's default A/B/C/N values.
|
||||
logic [CONTEXT_COUNT-1:0] context_written;
|
||||
logic [31:0] init_A_value;
|
||||
logic [31:0] init_A_latched;
|
||||
logic [31:0] init_A_candidate;
|
||||
logic init_command_accepted;
|
||||
|
||||
// Read/write handshake terms.
|
||||
logic read_slot_open;
|
||||
logic read_accepted;
|
||||
logic write_accepted;
|
||||
|
||||
always_comb begin
|
||||
init_A_candidate = {15'd0, init_RANGE} + 32'd32;
|
||||
init_A_candidate = init_A_candidate >> 6;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_A_value = init_A_candidate;
|
||||
if (init_A_candidate < 32'd2) begin
|
||||
init_A_value = 32'd2;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_ready = 1'b0;
|
||||
if (!read_result_valid && !write_valid) begin
|
||||
init_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_command_accepted = 1'b0;
|
||||
if (init_valid && init_ready) begin
|
||||
init_command_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_slot_open = 1'b0;
|
||||
if (!read_result_valid || read_result_ready) begin
|
||||
read_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_ready = 1'b0;
|
||||
if (!init_command_accepted && read_slot_open) begin
|
||||
read_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_accepted = 1'b0;
|
||||
if (read_valid && read_ready) begin
|
||||
read_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
write_ready = 1'b0;
|
||||
if (!init_command_accepted) begin
|
||||
write_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
write_accepted = 1'b0;
|
||||
if (write_valid && write_ready) begin
|
||||
write_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
init_busy = 1'b0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
init_A_latched <= 32'd0;
|
||||
context_written <= {CONTEXT_COUNT{1'b0}};
|
||||
init_done <= 1'b0;
|
||||
read_result_valid <= 1'b0;
|
||||
read_result_context_index <= 9'd0;
|
||||
read_A <= 32'd0;
|
||||
read_B <= 32'sd0;
|
||||
read_C <= 9'sd0;
|
||||
read_N <= 16'd0;
|
||||
end else begin
|
||||
init_done <= 1'b0;
|
||||
|
||||
if (read_result_valid && read_result_ready && !read_accepted) begin
|
||||
read_result_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (init_command_accepted) begin
|
||||
init_A_latched <= init_A_value;
|
||||
context_written <= {CONTEXT_COUNT{1'b0}};
|
||||
init_done <= 1'b1;
|
||||
end
|
||||
|
||||
if (write_accepted) begin
|
||||
A_mem[write_context_index] <= write_A;
|
||||
B_mem[write_context_index] <= write_B;
|
||||
C_mem[write_context_index] <= write_C;
|
||||
N_mem[write_context_index] <= write_N;
|
||||
context_written[write_context_index] <= 1'b1;
|
||||
end
|
||||
|
||||
if (read_accepted) begin
|
||||
read_result_valid <= 1'b1;
|
||||
read_result_context_index <= read_context_index;
|
||||
if (context_written[read_context_index]) begin
|
||||
read_A <= A_mem[read_context_index];
|
||||
read_B <= B_mem[read_context_index];
|
||||
read_C <= C_mem[read_context_index];
|
||||
read_N <= N_mem[read_context_index];
|
||||
end else begin
|
||||
read_A <= init_A_latched;
|
||||
read_B <= 32'sd0;
|
||||
read_C <= 9'sd0;
|
||||
read_N <= 16'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1027
fpga/verilog/jls_context_model.sv
Normal file
1027
fpga/verilog/jls_context_model.sv
Normal file
File diff suppressed because it is too large
Load Diff
698
fpga/verilog/jls_context_quantizer.sv
Normal file
698
fpga/verilog/jls_context_quantizer.sv
Normal file
@@ -0,0 +1,698 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex G.1 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Quantize D1/D2/D3 into Q1/Q2/Q3 and compute context ID
|
||||
// Trace : docs/jls_traceability.md#context-update
|
||||
// Example : D1=22,D2=8,D3=2 with T1=3,T2=7,T3=21,NEAR=0 gives Q=4,3,1.
|
||||
//
|
||||
// Context quantization stage. It does not update A/B/C/N; it only computes the
|
||||
// regular-mode context selector from reconstructed neighbors and forwards the
|
||||
// predictor event to the later context memory/update pipeline.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_gradient_quantize_one #(
|
||||
// Signed gradient width. For PIX_WIDTH=16 this is 17 bits.
|
||||
parameter int DI_WIDTH = 17
|
||||
) (
|
||||
// Local gradient Di from the JPEG-LS standard.
|
||||
input var logic signed [DI_WIDTH-1:0] Di,
|
||||
|
||||
// JPEG-LS threshold T1 for the current NEAR.
|
||||
input var logic [15:0] T1,
|
||||
|
||||
// JPEG-LS threshold T2 for the current NEAR.
|
||||
input var logic [15:0] T2,
|
||||
|
||||
// JPEG-LS threshold T3 for the current NEAR.
|
||||
input var logic [15:0] T3,
|
||||
|
||||
// JPEG-LS NEAR parameter for the current strip frame.
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Quantized gradient Qi in the range -4..4.
|
||||
output logic signed [3:0] Qi
|
||||
);
|
||||
|
||||
// Padding for NEAR into the signed gradient compare width.
|
||||
localparam int NEAR_PAD_WIDTH = DI_WIDTH - 6;
|
||||
|
||||
// Signed compare constants. T1/T2/T3 are already valid for the configured
|
||||
// PIX_WIDTH, so truncation to DI_WIDTH is safe for the supported precisions.
|
||||
logic signed [DI_WIDTH-1:0] t1_pos;
|
||||
logic signed [DI_WIDTH-1:0] t2_pos;
|
||||
logic signed [DI_WIDTH-1:0] t3_pos;
|
||||
logic signed [DI_WIDTH-1:0] near_pos;
|
||||
logic signed [DI_WIDTH-1:0] t1_neg;
|
||||
logic signed [DI_WIDTH-1:0] t2_neg;
|
||||
logic signed [DI_WIDTH-1:0] t3_neg;
|
||||
logic signed [DI_WIDTH-1:0] near_neg;
|
||||
|
||||
always_comb begin
|
||||
t1_pos = $signed({1'b0, T1[DI_WIDTH-2:0]});
|
||||
t2_pos = $signed({1'b0, T2[DI_WIDTH-2:0]});
|
||||
t3_pos = $signed({1'b0, T3[DI_WIDTH-2:0]});
|
||||
near_pos = $signed({{NEAR_PAD_WIDTH{1'b0}}, NEAR});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
t1_neg = -t1_pos;
|
||||
t2_neg = -t2_pos;
|
||||
t3_neg = -t3_pos;
|
||||
near_neg = -near_pos;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Qi = 4'sd0;
|
||||
case (1'b1)
|
||||
(Di <= t3_neg): begin
|
||||
Qi = -4'sd4;
|
||||
end
|
||||
|
||||
(Di <= t2_neg): begin
|
||||
Qi = -4'sd3;
|
||||
end
|
||||
|
||||
(Di <= t1_neg): begin
|
||||
Qi = -4'sd2;
|
||||
end
|
||||
|
||||
(Di < near_neg): begin
|
||||
Qi = -4'sd1;
|
||||
end
|
||||
|
||||
(Di <= near_pos): begin
|
||||
Qi = 4'sd0;
|
||||
end
|
||||
|
||||
(Di < t1_pos): begin
|
||||
Qi = 4'sd1;
|
||||
end
|
||||
|
||||
(Di < t2_pos): begin
|
||||
Qi = 4'sd2;
|
||||
end
|
||||
|
||||
(Di < t3_pos): begin
|
||||
Qi = 4'sd3;
|
||||
end
|
||||
|
||||
default: begin
|
||||
Qi = 4'sd4;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module jls_context_quantizer #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Predicted event from jls_predictor is valid.
|
||||
input var logic predict_valid,
|
||||
|
||||
// This context stage can accept the current predicted event.
|
||||
output logic predict_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] predict_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] predict_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] predict_y,
|
||||
|
||||
// Forwarded first-pixel flag for strip-local state reset.
|
||||
input var logic predict_strip_first_pixel,
|
||||
|
||||
// Forwarded last-pixel flag for strip-local flush handling.
|
||||
input var logic predict_strip_last_pixel,
|
||||
|
||||
// JPEG-LS reconstructed neighbors.
|
||||
input var logic [PIX_WIDTH-1:0] Ra,
|
||||
input var logic [PIX_WIDTH-1:0] Rb,
|
||||
input var logic [PIX_WIDTH-1:0] Rc,
|
||||
input var logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// JPEG-LS MED prediction value Px.
|
||||
input var logic [PIX_WIDTH-1:0] Px,
|
||||
|
||||
// JPEG-LS threshold and NEAR values for the current strip frame.
|
||||
input var logic [15:0] T1,
|
||||
input var logic [15:0] T2,
|
||||
input var logic [15:0] T3,
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Quantized context event is valid.
|
||||
output logic context_valid,
|
||||
|
||||
// Downstream context memory/error stage accepted this event.
|
||||
input var logic context_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
output logic [PIX_WIDTH-1:0] context_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] context_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] context_y,
|
||||
|
||||
// Forwarded strip boundary flags.
|
||||
output logic context_strip_first_pixel,
|
||||
output logic context_strip_last_pixel,
|
||||
|
||||
// Forwarded predictor and neighbors.
|
||||
output logic [PIX_WIDTH-1:0] context_Px,
|
||||
output logic [PIX_WIDTH-1:0] context_Ra,
|
||||
output logic [PIX_WIDTH-1:0] context_Rb,
|
||||
output logic [PIX_WIDTH-1:0] context_Rc,
|
||||
output logic [PIX_WIDTH-1:0] context_Rd,
|
||||
|
||||
// Standard quantized gradients Q1/Q2/Q3.
|
||||
output logic signed [3:0] Q1,
|
||||
output logic signed [3:0] Q2,
|
||||
output logic signed [3:0] Q3,
|
||||
|
||||
// Absolute regular-mode context index, 0..364.
|
||||
output logic [8:0] context_index,
|
||||
|
||||
// Context sign is high when the unnormalized context value is negative.
|
||||
output logic context_negative,
|
||||
|
||||
// High when Q1=Q2=Q3=0, which selects JPEG-LS run mode.
|
||||
output logic run_mode_context
|
||||
);
|
||||
|
||||
// Signed gradient width, enough for -(2^PIX_WIDTH-1)..(2^PIX_WIDTH-1).
|
||||
localparam int DI_WIDTH = PIX_WIDTH + 1;
|
||||
|
||||
// Standard local gradients.
|
||||
logic signed [DI_WIDTH-1:0] D1;
|
||||
logic signed [DI_WIDTH-1:0] D2;
|
||||
logic signed [DI_WIDTH-1:0] D3;
|
||||
|
||||
// Registered input stage. Annex A.3 gradient quantization uses strip
|
||||
// constants T1/T2/T3/NEAR and four reconstructed neighbors; latching them
|
||||
// with the pixel event keeps active_strip_near from feeding the full compare
|
||||
// tree in the same 250 MHz cycle.
|
||||
logic stage_valid;
|
||||
logic [PIX_WIDTH-1:0] stage_sample;
|
||||
logic [12:0] stage_x;
|
||||
logic [12:0] stage_y;
|
||||
logic stage_strip_first_pixel;
|
||||
logic stage_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] stage_Px;
|
||||
logic [PIX_WIDTH-1:0] stage_Ra;
|
||||
logic [PIX_WIDTH-1:0] stage_Rb;
|
||||
logic [PIX_WIDTH-1:0] stage_Rc;
|
||||
logic [PIX_WIDTH-1:0] stage_Rd;
|
||||
logic [15:0] stage_T1;
|
||||
logic [15:0] stage_T2;
|
||||
logic [15:0] stage_T3;
|
||||
logic [5:0] stage_NEAR;
|
||||
|
||||
// One-entry input skid slot. predict_ready depends only on this local slot,
|
||||
// not on context_ready from the later context-memory hazard path. If the
|
||||
// quantizer pipeline stalls, one additional predictor event can be absorbed
|
||||
// locally; after the stall releases, the skid entry is promoted to stage_*.
|
||||
logic stage_next_valid;
|
||||
logic [PIX_WIDTH-1:0] stage_next_sample;
|
||||
logic [12:0] stage_next_x;
|
||||
logic [12:0] stage_next_y;
|
||||
logic stage_next_strip_first_pixel;
|
||||
logic stage_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Px;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] stage_next_Rd;
|
||||
logic [15:0] stage_next_T1;
|
||||
logic [15:0] stage_next_T2;
|
||||
logic [15:0] stage_next_T3;
|
||||
logic [5:0] stage_next_NEAR;
|
||||
|
||||
// Registered quantized-gradient payload. Splitting Q1/Q2/Q3 from the
|
||||
// threshold compare stage keeps Annex A.3 gradient quantization out of the
|
||||
// context-index adder chain at the 250 MHz target.
|
||||
logic q_stage_valid;
|
||||
logic [PIX_WIDTH-1:0] q_stage_sample;
|
||||
logic [12:0] q_stage_x;
|
||||
logic [12:0] q_stage_y;
|
||||
logic q_stage_strip_first_pixel;
|
||||
logic q_stage_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Px;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Ra;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Rb;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Rc;
|
||||
logic [PIX_WIDTH-1:0] q_stage_Rd;
|
||||
logic signed [3:0] q_stage_Q1;
|
||||
logic signed [3:0] q_stage_Q2;
|
||||
logic signed [3:0] q_stage_Q3;
|
||||
|
||||
// One-entry output skid slot. q_stage progression depends only on this
|
||||
// local slot, not on the downstream context-memory hazard ready path. This
|
||||
// keeps run/regular arbitration and context-memory backpressure out of the
|
||||
// quantizer stage register CE cone.
|
||||
logic context_next_valid;
|
||||
logic [PIX_WIDTH-1:0] context_next_sample;
|
||||
logic [12:0] context_next_x;
|
||||
logic [12:0] context_next_y;
|
||||
logic context_next_strip_first_pixel;
|
||||
logic context_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] context_next_Px;
|
||||
logic [PIX_WIDTH-1:0] context_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] context_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] context_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] context_next_Rd;
|
||||
logic signed [3:0] context_next_Q1;
|
||||
logic signed [3:0] context_next_Q2;
|
||||
logic signed [3:0] context_next_Q3;
|
||||
logic [8:0] context_next_index;
|
||||
logic context_next_negative;
|
||||
logic context_next_run_mode;
|
||||
|
||||
// Combinational quantized gradients.
|
||||
logic signed [3:0] q1_next;
|
||||
logic signed [3:0] q2_next;
|
||||
logic signed [3:0] q3_next;
|
||||
|
||||
// Signed context computation: (Q1 * 9 + Q2) * 9 + Q3.
|
||||
logic signed [9:0] q1_ext;
|
||||
logic signed [9:0] q2_ext;
|
||||
logic signed [9:0] q3_ext;
|
||||
logic signed [9:0] q1_times_81;
|
||||
logic signed [9:0] q2_times_9;
|
||||
logic signed [9:0] context_value_next;
|
||||
logic signed [9:0] context_abs_next;
|
||||
logic context_negative_next;
|
||||
logic run_mode_context_next;
|
||||
|
||||
// Handshake terms.
|
||||
logic context_accept;
|
||||
logic context_direct_from_q;
|
||||
logic context_store_next;
|
||||
logic context_promote_next;
|
||||
logic q_stage_to_output;
|
||||
logic q_stage_open;
|
||||
logic stage_to_q;
|
||||
logic accept_predict;
|
||||
logic stage_load_predict;
|
||||
logic stage_store_next;
|
||||
logic stage_promote_next;
|
||||
|
||||
always_comb begin
|
||||
D1 = $signed({1'b0, stage_Rd}) - $signed({1'b0, stage_Rb});
|
||||
D2 = $signed({1'b0, stage_Rb}) - $signed({1'b0, stage_Rc});
|
||||
D3 = $signed({1'b0, stage_Rc}) - $signed({1'b0, stage_Ra});
|
||||
end
|
||||
|
||||
jls_gradient_quantize_one #(
|
||||
.DI_WIDTH(DI_WIDTH)
|
||||
) q1_quantizer (
|
||||
.Di(D1),
|
||||
.T1(stage_T1),
|
||||
.T2(stage_T2),
|
||||
.T3(stage_T3),
|
||||
.NEAR(stage_NEAR),
|
||||
.Qi(q1_next)
|
||||
);
|
||||
|
||||
jls_gradient_quantize_one #(
|
||||
.DI_WIDTH(DI_WIDTH)
|
||||
) q2_quantizer (
|
||||
.Di(D2),
|
||||
.T1(stage_T1),
|
||||
.T2(stage_T2),
|
||||
.T3(stage_T3),
|
||||
.NEAR(stage_NEAR),
|
||||
.Qi(q2_next)
|
||||
);
|
||||
|
||||
jls_gradient_quantize_one #(
|
||||
.DI_WIDTH(DI_WIDTH)
|
||||
) q3_quantizer (
|
||||
.Di(D3),
|
||||
.T1(stage_T1),
|
||||
.T2(stage_T2),
|
||||
.T3(stage_T3),
|
||||
.NEAR(stage_NEAR),
|
||||
.Qi(q3_next)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
q1_ext = {{6{q_stage_Q1[3]}}, q_stage_Q1};
|
||||
q2_ext = {{6{q_stage_Q2[3]}}, q_stage_Q2};
|
||||
q3_ext = {{6{q_stage_Q3[3]}}, q_stage_Q3};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
q1_times_81 = (q1_ext <<< 6) + (q1_ext <<< 4) + q1_ext;
|
||||
q2_times_9 = (q2_ext <<< 3) + q2_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_value_next = q1_times_81 + q2_times_9 + q3_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_negative_next = 1'b0;
|
||||
if (context_value_next < 10'sd0) begin
|
||||
context_negative_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_abs_next = context_value_next;
|
||||
if (context_negative_next) begin
|
||||
context_abs_next = -context_value_next;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_mode_context_next = 1'b0;
|
||||
if (q_stage_Q1 == 4'sd0 && q_stage_Q2 == 4'sd0 && q_stage_Q3 == 4'sd0) begin
|
||||
run_mode_context_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_accept = 1'b0;
|
||||
if (context_valid && context_ready) begin
|
||||
context_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_direct_from_q = 1'b0;
|
||||
if (q_stage_to_output && (!context_valid || context_accept)) begin
|
||||
context_direct_from_q = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_store_next = 1'b0;
|
||||
if (q_stage_to_output && context_valid && !context_accept) begin
|
||||
context_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_promote_next = 1'b0;
|
||||
if (context_next_valid && (!context_valid || context_accept)) begin
|
||||
context_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
q_stage_to_output = 1'b0;
|
||||
if (q_stage_valid && !context_next_valid) begin
|
||||
q_stage_to_output = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
q_stage_open = 1'b0;
|
||||
if (!q_stage_valid || q_stage_to_output) begin
|
||||
q_stage_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_to_q = 1'b0;
|
||||
if (stage_valid && q_stage_open) begin
|
||||
stage_to_q = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
predict_ready = 1'b0;
|
||||
if (!stage_next_valid) begin
|
||||
predict_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_predict = 1'b0;
|
||||
if (predict_valid && predict_ready) begin
|
||||
accept_predict = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_load_predict = 1'b0;
|
||||
if (accept_predict && (!stage_valid || stage_to_q)) begin
|
||||
stage_load_predict = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_store_next = 1'b0;
|
||||
if (accept_predict && stage_valid && !stage_to_q) begin
|
||||
stage_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_promote_next = 1'b0;
|
||||
if (stage_to_q && stage_next_valid) begin
|
||||
stage_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
stage_valid <= 1'b0;
|
||||
stage_sample <= {PIX_WIDTH{1'b0}};
|
||||
stage_x <= 13'd0;
|
||||
stage_y <= 13'd0;
|
||||
stage_strip_first_pixel <= 1'b0;
|
||||
stage_strip_last_pixel <= 1'b0;
|
||||
stage_Px <= {PIX_WIDTH{1'b0}};
|
||||
stage_Ra <= {PIX_WIDTH{1'b0}};
|
||||
stage_Rb <= {PIX_WIDTH{1'b0}};
|
||||
stage_Rc <= {PIX_WIDTH{1'b0}};
|
||||
stage_Rd <= {PIX_WIDTH{1'b0}};
|
||||
stage_T1 <= 16'd0;
|
||||
stage_T2 <= 16'd0;
|
||||
stage_T3 <= 16'd0;
|
||||
stage_NEAR <= 6'd0;
|
||||
stage_next_valid <= 1'b0;
|
||||
stage_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_x <= 13'd0;
|
||||
stage_next_y <= 13'd0;
|
||||
stage_next_strip_first_pixel <= 1'b0;
|
||||
stage_next_strip_last_pixel <= 1'b0;
|
||||
stage_next_Px <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
stage_next_T1 <= 16'd0;
|
||||
stage_next_T2 <= 16'd0;
|
||||
stage_next_T3 <= 16'd0;
|
||||
stage_next_NEAR <= 6'd0;
|
||||
q_stage_valid <= 1'b0;
|
||||
q_stage_sample <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_x <= 13'd0;
|
||||
q_stage_y <= 13'd0;
|
||||
q_stage_strip_first_pixel <= 1'b0;
|
||||
q_stage_strip_last_pixel <= 1'b0;
|
||||
q_stage_Px <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Ra <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Rb <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Rc <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Rd <= {PIX_WIDTH{1'b0}};
|
||||
q_stage_Q1 <= 4'sd0;
|
||||
q_stage_Q2 <= 4'sd0;
|
||||
q_stage_Q3 <= 4'sd0;
|
||||
context_next_valid <= 1'b0;
|
||||
context_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
context_next_x <= 13'd0;
|
||||
context_next_y <= 13'd0;
|
||||
context_next_strip_first_pixel <= 1'b0;
|
||||
context_next_strip_last_pixel <= 1'b0;
|
||||
context_next_Px <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
context_next_Q1 <= 4'sd0;
|
||||
context_next_Q2 <= 4'sd0;
|
||||
context_next_Q3 <= 4'sd0;
|
||||
context_next_index <= 9'd0;
|
||||
context_next_negative <= 1'b0;
|
||||
context_next_run_mode <= 1'b0;
|
||||
context_valid <= 1'b0;
|
||||
context_sample <= {PIX_WIDTH{1'b0}};
|
||||
context_x <= 13'd0;
|
||||
context_y <= 13'd0;
|
||||
context_strip_first_pixel <= 1'b0;
|
||||
context_strip_last_pixel <= 1'b0;
|
||||
context_Px <= {PIX_WIDTH{1'b0}};
|
||||
context_Ra <= {PIX_WIDTH{1'b0}};
|
||||
context_Rb <= {PIX_WIDTH{1'b0}};
|
||||
context_Rc <= {PIX_WIDTH{1'b0}};
|
||||
context_Rd <= {PIX_WIDTH{1'b0}};
|
||||
Q1 <= 4'sd0;
|
||||
Q2 <= 4'sd0;
|
||||
Q3 <= 4'sd0;
|
||||
context_index <= 9'd0;
|
||||
context_negative <= 1'b0;
|
||||
run_mode_context <= 1'b0;
|
||||
end else begin
|
||||
if (context_accept && !context_promote_next && !context_direct_from_q) begin
|
||||
context_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (context_promote_next) begin
|
||||
context_valid <= 1'b1;
|
||||
context_sample <= context_next_sample;
|
||||
context_x <= context_next_x;
|
||||
context_y <= context_next_y;
|
||||
context_strip_first_pixel <= context_next_strip_first_pixel;
|
||||
context_strip_last_pixel <= context_next_strip_last_pixel;
|
||||
context_Px <= context_next_Px;
|
||||
context_Ra <= context_next_Ra;
|
||||
context_Rb <= context_next_Rb;
|
||||
context_Rc <= context_next_Rc;
|
||||
context_Rd <= context_next_Rd;
|
||||
Q1 <= context_next_Q1;
|
||||
Q2 <= context_next_Q2;
|
||||
Q3 <= context_next_Q3;
|
||||
context_index <= context_next_index;
|
||||
context_negative <= context_next_negative;
|
||||
run_mode_context <= context_next_run_mode;
|
||||
context_next_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (context_direct_from_q) begin
|
||||
context_valid <= 1'b1;
|
||||
context_sample <= q_stage_sample;
|
||||
context_x <= q_stage_x;
|
||||
context_y <= q_stage_y;
|
||||
context_strip_first_pixel <= q_stage_strip_first_pixel;
|
||||
context_strip_last_pixel <= q_stage_strip_last_pixel;
|
||||
context_Px <= q_stage_Px;
|
||||
context_Ra <= q_stage_Ra;
|
||||
context_Rb <= q_stage_Rb;
|
||||
context_Rc <= q_stage_Rc;
|
||||
context_Rd <= q_stage_Rd;
|
||||
Q1 <= q_stage_Q1;
|
||||
Q2 <= q_stage_Q2;
|
||||
Q3 <= q_stage_Q3;
|
||||
context_index <= context_abs_next[8:0];
|
||||
context_negative <= context_negative_next;
|
||||
run_mode_context <= run_mode_context_next;
|
||||
end
|
||||
|
||||
if (context_store_next) begin
|
||||
context_next_valid <= 1'b1;
|
||||
context_next_sample <= q_stage_sample;
|
||||
context_next_x <= q_stage_x;
|
||||
context_next_y <= q_stage_y;
|
||||
context_next_strip_first_pixel <= q_stage_strip_first_pixel;
|
||||
context_next_strip_last_pixel <= q_stage_strip_last_pixel;
|
||||
context_next_Px <= q_stage_Px;
|
||||
context_next_Ra <= q_stage_Ra;
|
||||
context_next_Rb <= q_stage_Rb;
|
||||
context_next_Rc <= q_stage_Rc;
|
||||
context_next_Rd <= q_stage_Rd;
|
||||
context_next_Q1 <= q_stage_Q1;
|
||||
context_next_Q2 <= q_stage_Q2;
|
||||
context_next_Q3 <= q_stage_Q3;
|
||||
context_next_index <= context_abs_next[8:0];
|
||||
context_next_negative <= context_negative_next;
|
||||
context_next_run_mode <= run_mode_context_next;
|
||||
end
|
||||
|
||||
if (stage_to_q) begin
|
||||
q_stage_valid <= 1'b1;
|
||||
q_stage_sample <= stage_sample;
|
||||
q_stage_x <= stage_x;
|
||||
q_stage_y <= stage_y;
|
||||
q_stage_strip_first_pixel <= stage_strip_first_pixel;
|
||||
q_stage_strip_last_pixel <= stage_strip_last_pixel;
|
||||
q_stage_Px <= stage_Px;
|
||||
q_stage_Ra <= stage_Ra;
|
||||
q_stage_Rb <= stage_Rb;
|
||||
q_stage_Rc <= stage_Rc;
|
||||
q_stage_Rd <= stage_Rd;
|
||||
q_stage_Q1 <= q1_next;
|
||||
q_stage_Q2 <= q2_next;
|
||||
q_stage_Q3 <= q3_next;
|
||||
end else if (q_stage_to_output) begin
|
||||
q_stage_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (stage_promote_next) begin
|
||||
stage_valid <= 1'b1;
|
||||
stage_sample <= stage_next_sample;
|
||||
stage_x <= stage_next_x;
|
||||
stage_y <= stage_next_y;
|
||||
stage_strip_first_pixel <= stage_next_strip_first_pixel;
|
||||
stage_strip_last_pixel <= stage_next_strip_last_pixel;
|
||||
stage_Px <= stage_next_Px;
|
||||
stage_Ra <= stage_next_Ra;
|
||||
stage_Rb <= stage_next_Rb;
|
||||
stage_Rc <= stage_next_Rc;
|
||||
stage_Rd <= stage_next_Rd;
|
||||
stage_T1 <= stage_next_T1;
|
||||
stage_T2 <= stage_next_T2;
|
||||
stage_T3 <= stage_next_T3;
|
||||
stage_NEAR <= stage_next_NEAR;
|
||||
stage_next_valid <= 1'b0;
|
||||
end else if (stage_to_q) begin
|
||||
stage_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (stage_load_predict) begin
|
||||
stage_valid <= 1'b1;
|
||||
stage_sample <= predict_sample;
|
||||
stage_x <= predict_x;
|
||||
stage_y <= predict_y;
|
||||
stage_strip_first_pixel <= predict_strip_first_pixel;
|
||||
stage_strip_last_pixel <= predict_strip_last_pixel;
|
||||
stage_Px <= Px;
|
||||
stage_Ra <= Ra;
|
||||
stage_Rb <= Rb;
|
||||
stage_Rc <= Rc;
|
||||
stage_Rd <= Rd;
|
||||
stage_T1 <= T1;
|
||||
stage_T2 <= T2;
|
||||
stage_T3 <= T3;
|
||||
stage_NEAR <= NEAR;
|
||||
end
|
||||
|
||||
if (stage_store_next) begin
|
||||
stage_next_valid <= 1'b1;
|
||||
stage_next_sample <= predict_sample;
|
||||
stage_next_x <= predict_x;
|
||||
stage_next_y <= predict_y;
|
||||
stage_next_strip_first_pixel <= predict_strip_first_pixel;
|
||||
stage_next_strip_last_pixel <= predict_strip_last_pixel;
|
||||
stage_next_Px <= Px;
|
||||
stage_next_Ra <= Ra;
|
||||
stage_next_Rb <= Rb;
|
||||
stage_next_Rc <= Rc;
|
||||
stage_next_Rd <= Rd;
|
||||
stage_next_T1 <= T1;
|
||||
stage_next_T2 <= T2;
|
||||
stage_next_T3 <= T3;
|
||||
stage_next_NEAR <= NEAR;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
960
fpga/verilog/jls_context_update.sv
Normal file
960
fpga/verilog/jls_context_update.sv
Normal file
@@ -0,0 +1,960 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Golomb parameter, Annex A.6 variables update
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Compute k and update regular-mode A/B/C/N
|
||||
// Trace : docs/jls_traceability.md#context-update
|
||||
// Example : A=4,N=1,Errval=3 gives k=2 before A is updated to 7.
|
||||
//
|
||||
// Pipelined single-context update arithmetic. Stage 0 captures the Annex A.6
|
||||
// input context. Stage 1 registers the odd-scale multiplier operands for
|
||||
// Errval*(2*NEAR+1) and carries the Annex A.5 Golomb k decision. Stage 2
|
||||
// captures the scaled product. Stage 3 accumulates B[Q] and performs
|
||||
// RESET/bias correction. The split keeps NEAR-driven arithmetic short at the
|
||||
// 250 MHz target.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_context_update (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input update event is valid.
|
||||
input var logic update_valid,
|
||||
|
||||
// This stage can accept the update event.
|
||||
output logic update_ready,
|
||||
|
||||
// Standard context variables before update.
|
||||
input var logic [31:0] A_in,
|
||||
input var logic signed [31:0] B_in,
|
||||
input var logic signed [8:0] C_in,
|
||||
input var logic [15:0] N_in,
|
||||
|
||||
// Quantized prediction error Errval for this context.
|
||||
input var logic signed [31:0] Errval,
|
||||
|
||||
// Context and strip metadata forwarded with Errval.
|
||||
input var logic [8:0] context_index_in,
|
||||
input var logic strip_last_pixel_in,
|
||||
|
||||
// Coding parameters forwarded for the Golomb encoder.
|
||||
input var logic [4:0] qbpp_in,
|
||||
input var logic [6:0] LIMIT_in,
|
||||
|
||||
// JPEG-LS NEAR parameter for this strip.
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// JPEG-LS RESET parameter, normally 64.
|
||||
input var logic [15:0] RESET,
|
||||
|
||||
// Output updated context event is valid.
|
||||
output logic result_valid,
|
||||
|
||||
// Downstream context table accepted this result.
|
||||
input var logic result_ready,
|
||||
|
||||
// Golomb parameter computed from A_in and N_in before the update.
|
||||
output logic [4:0] k,
|
||||
|
||||
// Forwarded Errval for the downstream error mapper.
|
||||
output logic signed [31:0] Errval_out,
|
||||
|
||||
// Forwarded context and strip metadata.
|
||||
output logic [8:0] context_index_out,
|
||||
output logic strip_last_pixel_out,
|
||||
|
||||
// Forwarded coding parameters.
|
||||
output logic [4:0] qbpp_out,
|
||||
output logic [6:0] LIMIT_out,
|
||||
|
||||
// High when get_error_correction(k | NEAR) requests mapping inversion.
|
||||
output logic map_invert,
|
||||
|
||||
// Standard context variables after update.
|
||||
output logic [31:0] A_out,
|
||||
output logic signed [31:0] B_out,
|
||||
output logic signed [8:0] C_out,
|
||||
output logic [15:0] N_out
|
||||
);
|
||||
|
||||
// Signed and absolute forms of Errval.
|
||||
logic signed [32:0] Errval_ext;
|
||||
logic [32:0] abs_Errval_ext;
|
||||
|
||||
// Stage-1 update terms from Annex A.6.
|
||||
logic signed [7:0] near_scale;
|
||||
logic signed [40:0] B_delta;
|
||||
logic [31:0] A_accum_next;
|
||||
logic signed [40:0] B_accum_next;
|
||||
logic [15:0] N_halved_plus_one_next;
|
||||
|
||||
// Golomb parameter compare terms from A_in and N_in.
|
||||
logic [31:0] N_shift_0;
|
||||
logic [31:0] N_shift_1;
|
||||
logic [31:0] N_shift_2;
|
||||
logic [31:0] N_shift_3;
|
||||
logic [31:0] N_shift_4;
|
||||
logic [31:0] N_shift_5;
|
||||
logic [31:0] N_shift_6;
|
||||
logic [31:0] N_shift_7;
|
||||
logic [31:0] N_shift_8;
|
||||
logic [31:0] N_shift_9;
|
||||
logic [31:0] N_shift_10;
|
||||
logic [31:0] N_shift_11;
|
||||
logic [31:0] N_shift_12;
|
||||
logic [31:0] N_shift_13;
|
||||
logic [31:0] N_shift_14;
|
||||
logic [31:0] N_shift_15;
|
||||
logic [31:0] N_shift_16;
|
||||
logic [4:0] k_next;
|
||||
logic k_or_near_is_zero;
|
||||
logic signed [32:0] map_bias_check;
|
||||
logic map_invert_next;
|
||||
|
||||
// Stage-0 registered input payload. These are raw standard variables from
|
||||
// the context table and regular-mode Errval path.
|
||||
logic s0_valid;
|
||||
logic [31:0] s0_A_in;
|
||||
logic signed [31:0] s0_B_in;
|
||||
logic signed [8:0] s0_C_in;
|
||||
logic [15:0] s0_N_in;
|
||||
logic signed [31:0] s0_Errval;
|
||||
logic [8:0] s0_context_index;
|
||||
logic s0_strip_last_pixel;
|
||||
logic [4:0] s0_qbpp;
|
||||
logic [6:0] s0_LIMIT;
|
||||
logic [5:0] s0_NEAR;
|
||||
logic [15:0] s0_RESET;
|
||||
|
||||
// One-entry input skid slot. It keeps update_ready dependent only on local
|
||||
// queue fullness instead of the result_ready/context-write/error-map chain.
|
||||
// Example: if stage 0 is blocked for one cycle, the next regular Errval can
|
||||
// be captured here without propagating downstream backpressure to the
|
||||
// regular error quantizer CE path.
|
||||
logic update_next_valid;
|
||||
logic [31:0] update_next_A_in;
|
||||
logic signed [31:0] update_next_B_in;
|
||||
logic signed [8:0] update_next_C_in;
|
||||
logic [15:0] update_next_N_in;
|
||||
logic signed [31:0] update_next_Errval;
|
||||
logic [8:0] update_next_context_index;
|
||||
logic update_next_strip_last_pixel;
|
||||
logic [4:0] update_next_qbpp;
|
||||
logic [6:0] update_next_LIMIT;
|
||||
logic [5:0] update_next_NEAR;
|
||||
logic [15:0] update_next_RESET;
|
||||
|
||||
// Stage-1 registered multiplier operands/update payload. s1_Errval_ext and
|
||||
// s1_near_scale are the registered odd-scale multiply operands for the
|
||||
// Annex A.6 Errval*(2*NEAR+1) term.
|
||||
logic s1_valid;
|
||||
logic [31:0] s1_A_accum;
|
||||
logic signed [31:0] s1_B_in;
|
||||
logic signed [8:0] s1_C_in;
|
||||
logic [15:0] s1_N_in;
|
||||
logic [15:0] s1_N_halved_plus_one;
|
||||
logic [15:0] s1_RESET;
|
||||
logic [4:0] s1_k;
|
||||
logic signed [31:0] s1_Errval;
|
||||
logic [8:0] s1_context_index;
|
||||
logic s1_strip_last_pixel;
|
||||
logic [4:0] s1_qbpp;
|
||||
logic [6:0] s1_LIMIT;
|
||||
logic s1_map_invert;
|
||||
logic signed [32:0] s1_Errval_ext;
|
||||
logic signed [7:0] s1_near_scale;
|
||||
|
||||
// Stage-2 registered product/update payload. s2_B_delta is the registered
|
||||
// scaled Errval term for the Annex A.6 B[Q] update before the following
|
||||
// carry-chain add.
|
||||
logic s2_valid;
|
||||
logic [31:0] s2_A_accum;
|
||||
logic signed [31:0] s2_B_in;
|
||||
logic signed [40:0] s2_B_delta;
|
||||
logic signed [8:0] s2_C_in;
|
||||
logic [15:0] s2_N_in;
|
||||
logic [15:0] s2_N_halved_plus_one;
|
||||
logic [15:0] s2_RESET;
|
||||
logic [4:0] s2_k;
|
||||
logic signed [31:0] s2_Errval;
|
||||
logic [8:0] s2_context_index;
|
||||
logic s2_strip_last_pixel;
|
||||
logic [4:0] s2_qbpp;
|
||||
logic [6:0] s2_LIMIT;
|
||||
logic s2_map_invert;
|
||||
|
||||
// Registered stage-3 payload. These names track the standard A/B/C/N and
|
||||
// Errval variables so the implementation can be compared with Annex A.6.
|
||||
logic stage_valid;
|
||||
logic [31:0] stage_A_accum;
|
||||
logic signed [40:0] stage_B_accum;
|
||||
logic signed [8:0] stage_C_in;
|
||||
logic [15:0] stage_N_in;
|
||||
logic [15:0] stage_N_halved_plus_one;
|
||||
logic [15:0] stage_RESET;
|
||||
logic [4:0] stage_k;
|
||||
logic signed [31:0] stage_Errval;
|
||||
logic [8:0] stage_context_index;
|
||||
logic stage_strip_last_pixel;
|
||||
logic [4:0] stage_qbpp;
|
||||
logic [6:0] stage_LIMIT;
|
||||
logic stage_map_invert;
|
||||
|
||||
// Registered pre-bias stage. This stage holds the Annex A.6 variables after
|
||||
// RESET folding but before B[Q]/C[Q] bias correction.
|
||||
logic prebias_valid;
|
||||
logic [31:0] prebias_A_after_reset;
|
||||
logic signed [40:0] prebias_B_after_reset;
|
||||
logic signed [8:0] prebias_C_in;
|
||||
logic [15:0] prebias_N_after_increment;
|
||||
logic [4:0] prebias_k;
|
||||
logic signed [31:0] prebias_Errval;
|
||||
logic [8:0] prebias_context_index;
|
||||
logic prebias_strip_last_pixel;
|
||||
logic [4:0] prebias_qbpp;
|
||||
logic [6:0] prebias_LIMIT;
|
||||
logic prebias_map_invert;
|
||||
|
||||
// Registered bias-correction stage. The corrected B[Q]/C[Q] values are
|
||||
// stored here so the public result path does not have to re-run the bias
|
||||
// compare/add/saturate logic in the same cycle.
|
||||
logic bias_valid;
|
||||
logic [31:0] bias_A_after_reset;
|
||||
logic signed [31:0] bias_B_after_bias;
|
||||
logic signed [8:0] bias_C_after_bias;
|
||||
logic [15:0] bias_N_after_increment;
|
||||
logic [4:0] bias_k;
|
||||
logic signed [31:0] bias_Errval;
|
||||
logic [8:0] bias_context_index;
|
||||
logic bias_strip_last_pixel;
|
||||
logic [4:0] bias_qbpp;
|
||||
logic [6:0] bias_LIMIT;
|
||||
logic bias_map_invert;
|
||||
|
||||
// One-entry output skid slot. Stage 3 may retire into this slot when the
|
||||
// public result register is still waiting for context writeback or error
|
||||
// mapper acceptance. This breaks the downstream ready chain from feeding
|
||||
// back through every context-update pipeline CE in one 250 MHz cycle.
|
||||
logic result_next_valid;
|
||||
logic [4:0] result_next_k;
|
||||
logic signed [31:0] result_next_Errval_out;
|
||||
logic [8:0] result_next_context_index_out;
|
||||
logic result_next_strip_last_pixel_out;
|
||||
logic [4:0] result_next_qbpp_out;
|
||||
logic [6:0] result_next_LIMIT_out;
|
||||
logic result_next_map_invert;
|
||||
logic [31:0] result_next_A_out;
|
||||
logic signed [31:0] result_next_B_out;
|
||||
logic signed [8:0] result_next_C_out;
|
||||
logic [15:0] result_next_N_out;
|
||||
|
||||
// Stage-1 RESET and bias-update intermediates.
|
||||
logic [31:0] A_after_reset;
|
||||
logic signed [40:0] B_after_reset;
|
||||
logic [15:0] N_after_increment;
|
||||
logic signed [40:0] bias_stage_N_signed_ext;
|
||||
logic signed [40:0] bias_stage_negative_N_plus_one;
|
||||
logic signed [40:0] bias_stage_B_plus_N;
|
||||
logic signed [40:0] bias_stage_B_minus_N;
|
||||
logic signed [40:0] bias_stage_B_after_bias;
|
||||
logic signed [8:0] bias_stage_C_after_bias;
|
||||
|
||||
// Handshake terms.
|
||||
logic result_slot_open;
|
||||
logic bias_to_result;
|
||||
logic bias_open;
|
||||
logic prebias_to_bias;
|
||||
logic prebias_open;
|
||||
logic stage_to_prebias;
|
||||
logic stage_open;
|
||||
logic s2_to_stage;
|
||||
logic s2_open;
|
||||
logic s1_to_s2;
|
||||
logic s1_open;
|
||||
logic s0_to_s1;
|
||||
logic s0_open;
|
||||
logic accept_update;
|
||||
logic update_load_input;
|
||||
logic update_store_next;
|
||||
logic s0_promote_next;
|
||||
logic result_accept;
|
||||
logic result_direct_from_bias;
|
||||
logic result_store_next;
|
||||
logic result_promote_next;
|
||||
|
||||
// Shared narrow-scale multiplier for Annex A.6 Errval*(2*NEAR+1).
|
||||
jls_near_scale_mul #(
|
||||
.INPUT_WIDTH(33),
|
||||
.OUTPUT_WIDTH(41)
|
||||
) context_update_near_scale_mul_i (
|
||||
.multiplicand_i(s1_Errval_ext),
|
||||
.near_scale_i(s1_near_scale[5:0]),
|
||||
.product_o(B_delta)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
Errval_ext = {s0_Errval[31], s0_Errval};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
abs_Errval_ext = Errval_ext[32:0];
|
||||
if (Errval_ext < 33'sd0) begin
|
||||
abs_Errval_ext = -Errval_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_scale = $signed({1'b0, s0_NEAR, 1'b1});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
A_accum_next = s0_A_in + abs_Errval_ext[31:0];
|
||||
B_accum_next = {{9{s2_B_in[31]}}, s2_B_in} + s2_B_delta;
|
||||
N_halved_plus_one_next = (s0_N_in >> 1) + 16'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
N_shift_0 = {16'd0, s0_N_in};
|
||||
N_shift_1 = {15'd0, s0_N_in, 1'd0};
|
||||
N_shift_2 = {14'd0, s0_N_in, 2'd0};
|
||||
N_shift_3 = {13'd0, s0_N_in, 3'd0};
|
||||
N_shift_4 = {12'd0, s0_N_in, 4'd0};
|
||||
N_shift_5 = {11'd0, s0_N_in, 5'd0};
|
||||
N_shift_6 = {10'd0, s0_N_in, 6'd0};
|
||||
N_shift_7 = {9'd0, s0_N_in, 7'd0};
|
||||
N_shift_8 = {8'd0, s0_N_in, 8'd0};
|
||||
N_shift_9 = {7'd0, s0_N_in, 9'd0};
|
||||
N_shift_10 = {6'd0, s0_N_in, 10'd0};
|
||||
N_shift_11 = {5'd0, s0_N_in, 11'd0};
|
||||
N_shift_12 = {4'd0, s0_N_in, 12'd0};
|
||||
N_shift_13 = {3'd0, s0_N_in, 13'd0};
|
||||
N_shift_14 = {2'd0, s0_N_in, 14'd0};
|
||||
N_shift_15 = {1'd0, s0_N_in, 15'd0};
|
||||
N_shift_16 = {s0_N_in, 16'd0};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
k_next = 5'd16;
|
||||
case (1'b1)
|
||||
(N_shift_0 >= s0_A_in): begin
|
||||
k_next = 5'd0;
|
||||
end
|
||||
|
||||
(N_shift_1 >= s0_A_in): begin
|
||||
k_next = 5'd1;
|
||||
end
|
||||
|
||||
(N_shift_2 >= s0_A_in): begin
|
||||
k_next = 5'd2;
|
||||
end
|
||||
|
||||
(N_shift_3 >= s0_A_in): begin
|
||||
k_next = 5'd3;
|
||||
end
|
||||
|
||||
(N_shift_4 >= s0_A_in): begin
|
||||
k_next = 5'd4;
|
||||
end
|
||||
|
||||
(N_shift_5 >= s0_A_in): begin
|
||||
k_next = 5'd5;
|
||||
end
|
||||
|
||||
(N_shift_6 >= s0_A_in): begin
|
||||
k_next = 5'd6;
|
||||
end
|
||||
|
||||
(N_shift_7 >= s0_A_in): begin
|
||||
k_next = 5'd7;
|
||||
end
|
||||
|
||||
(N_shift_8 >= s0_A_in): begin
|
||||
k_next = 5'd8;
|
||||
end
|
||||
|
||||
(N_shift_9 >= s0_A_in): begin
|
||||
k_next = 5'd9;
|
||||
end
|
||||
|
||||
(N_shift_10 >= s0_A_in): begin
|
||||
k_next = 5'd10;
|
||||
end
|
||||
|
||||
(N_shift_11 >= s0_A_in): begin
|
||||
k_next = 5'd11;
|
||||
end
|
||||
|
||||
(N_shift_12 >= s0_A_in): begin
|
||||
k_next = 5'd12;
|
||||
end
|
||||
|
||||
(N_shift_13 >= s0_A_in): begin
|
||||
k_next = 5'd13;
|
||||
end
|
||||
|
||||
(N_shift_14 >= s0_A_in): begin
|
||||
k_next = 5'd14;
|
||||
end
|
||||
|
||||
(N_shift_15 >= s0_A_in): begin
|
||||
k_next = 5'd15;
|
||||
end
|
||||
|
||||
default: begin
|
||||
k_next = 5'd16;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
k_or_near_is_zero = 1'b0;
|
||||
if (k_next == 5'd0 && s0_NEAR == 6'd0) begin
|
||||
k_or_near_is_zero = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
map_bias_check = {s0_B_in[31], s0_B_in} + {s0_B_in[31], s0_B_in} + $signed({17'd0, s0_N_in}) - 33'sd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
map_invert_next = 1'b0;
|
||||
if (k_or_near_is_zero && map_bias_check < 33'sd0) begin
|
||||
map_invert_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.6 variables update
|
||||
// Pseudocode : If N[Q] == RESET then halve A[Q], B[Q], and N[Q].
|
||||
A_after_reset = stage_A_accum;
|
||||
B_after_reset = stage_B_accum;
|
||||
N_after_increment = stage_N_in + 16'd1;
|
||||
if (stage_N_in == stage_RESET) begin
|
||||
A_after_reset = stage_A_accum >> 1;
|
||||
B_after_reset = stage_B_accum >>> 1;
|
||||
N_after_increment = stage_N_halved_plus_one;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
bias_stage_N_signed_ext = $signed({25'd0, prebias_N_after_increment});
|
||||
bias_stage_negative_N_plus_one = -bias_stage_N_signed_ext + 41'sd1;
|
||||
bias_stage_B_plus_N = prebias_B_after_reset + bias_stage_N_signed_ext;
|
||||
bias_stage_B_minus_N = prebias_B_after_reset - bias_stage_N_signed_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.6 variables update
|
||||
// Pseudocode : Bias correction for B[Q] and C[Q].
|
||||
// Stage note : Compute the corrected B[Q]/C[Q] from the registered
|
||||
// pre-bias state so RESET folding and bias correction do not sit in the
|
||||
// same timing cone.
|
||||
bias_stage_B_after_bias = prebias_B_after_reset;
|
||||
bias_stage_C_after_bias = prebias_C_in;
|
||||
|
||||
case (1'b1)
|
||||
(bias_stage_B_plus_N <= 41'sd0): begin
|
||||
bias_stage_B_after_bias = bias_stage_B_plus_N;
|
||||
if (bias_stage_B_plus_N <= -bias_stage_N_signed_ext) begin
|
||||
bias_stage_B_after_bias = bias_stage_negative_N_plus_one;
|
||||
end
|
||||
if (prebias_C_in > -9'sd128) begin
|
||||
bias_stage_C_after_bias = prebias_C_in - 9'sd1;
|
||||
end
|
||||
end
|
||||
|
||||
(prebias_B_after_reset > 41'sd0): begin
|
||||
bias_stage_B_after_bias = bias_stage_B_minus_N;
|
||||
if (bias_stage_B_minus_N > 41'sd0) begin
|
||||
bias_stage_B_after_bias = 41'sd0;
|
||||
end
|
||||
if (prebias_C_in < 9'sd127) begin
|
||||
bias_stage_C_after_bias = prebias_C_in + 9'sd1;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
bias_stage_B_after_bias = prebias_B_after_reset;
|
||||
bias_stage_C_after_bias = prebias_C_in;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_accept = 1'b0;
|
||||
if (result_valid && result_ready) begin
|
||||
result_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// The bias stage can retire whenever the local second output slot is free.
|
||||
// It does not need to see result_ready from the downstream context
|
||||
// write/error mapper path in the same cycle.
|
||||
result_slot_open = !result_next_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
bias_to_result = 1'b0;
|
||||
if (bias_valid && result_slot_open) begin
|
||||
bias_to_result = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
bias_open = 1'b0;
|
||||
if (!bias_valid || bias_to_result) begin
|
||||
bias_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prebias_to_bias = 1'b0;
|
||||
if (prebias_valid && bias_open) begin
|
||||
prebias_to_bias = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prebias_open = 1'b0;
|
||||
if (!prebias_valid || prebias_to_bias) begin
|
||||
prebias_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_to_prebias = 1'b0;
|
||||
if (stage_valid && prebias_open) begin
|
||||
stage_to_prebias = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
stage_open = 1'b0;
|
||||
if (!stage_valid || stage_to_prebias) begin
|
||||
stage_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s2_to_stage = 1'b0;
|
||||
if (s2_valid && stage_open) begin
|
||||
s2_to_stage = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s2_open = 1'b0;
|
||||
if (!s2_valid || s2_to_stage) begin
|
||||
s2_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s1_to_s2 = 1'b0;
|
||||
if (s1_valid && s2_open) begin
|
||||
s1_to_s2 = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s1_open = 1'b0;
|
||||
if (!s1_valid || s1_to_s2) begin
|
||||
s1_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s0_to_s1 = 1'b0;
|
||||
if (s0_valid && s1_open) begin
|
||||
s0_to_s1 = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s0_open = 1'b0;
|
||||
if (!s0_valid || s0_to_s1) begin
|
||||
s0_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_update = 1'b0;
|
||||
if (update_valid && update_ready) begin
|
||||
accept_update = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_direct_from_bias = 1'b0;
|
||||
if (bias_to_result && !result_valid) begin
|
||||
result_direct_from_bias = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_store_next = 1'b0;
|
||||
if (bias_to_result && result_valid) begin
|
||||
result_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
result_promote_next = 1'b0;
|
||||
if (result_next_valid && !result_valid) begin
|
||||
result_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
update_ready = 1'b0;
|
||||
if (!update_next_valid) begin
|
||||
update_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
update_load_input = 1'b0;
|
||||
if (accept_update && (!s0_valid || s0_to_s1)) begin
|
||||
update_load_input = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
update_store_next = 1'b0;
|
||||
if (accept_update && s0_valid && !s0_to_s1) begin
|
||||
update_store_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
s0_promote_next = 1'b0;
|
||||
if (s0_to_s1 && update_next_valid) begin
|
||||
s0_promote_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
s0_valid <= 1'b0;
|
||||
s0_A_in <= 32'd0;
|
||||
s0_B_in <= 32'sd0;
|
||||
s0_C_in <= 9'sd0;
|
||||
s0_N_in <= 16'd0;
|
||||
s0_Errval <= 32'sd0;
|
||||
s0_context_index <= 9'd0;
|
||||
s0_strip_last_pixel <= 1'b0;
|
||||
s0_qbpp <= 5'd0;
|
||||
s0_LIMIT <= 7'd0;
|
||||
s0_NEAR <= 6'd0;
|
||||
s0_RESET <= 16'd0;
|
||||
update_next_valid <= 1'b0;
|
||||
update_next_A_in <= 32'd0;
|
||||
update_next_B_in <= 32'sd0;
|
||||
update_next_C_in <= 9'sd0;
|
||||
update_next_N_in <= 16'd0;
|
||||
update_next_Errval <= 32'sd0;
|
||||
update_next_context_index <= 9'd0;
|
||||
update_next_strip_last_pixel <= 1'b0;
|
||||
update_next_qbpp <= 5'd0;
|
||||
update_next_LIMIT <= 7'd0;
|
||||
update_next_NEAR <= 6'd0;
|
||||
update_next_RESET <= 16'd0;
|
||||
s1_valid <= 1'b0;
|
||||
s1_A_accum <= 32'd0;
|
||||
s1_B_in <= 32'sd0;
|
||||
s1_C_in <= 9'sd0;
|
||||
s1_N_in <= 16'd0;
|
||||
s1_N_halved_plus_one <= 16'd0;
|
||||
s1_RESET <= 16'd0;
|
||||
s1_k <= 5'd0;
|
||||
s1_Errval <= 32'sd0;
|
||||
s1_context_index <= 9'd0;
|
||||
s1_strip_last_pixel <= 1'b0;
|
||||
s1_qbpp <= 5'd0;
|
||||
s1_LIMIT <= 7'd0;
|
||||
s1_map_invert <= 1'b0;
|
||||
s1_Errval_ext <= 33'sd0;
|
||||
s1_near_scale <= 8'sd1;
|
||||
s2_valid <= 1'b0;
|
||||
s2_A_accum <= 32'd0;
|
||||
s2_B_in <= 32'sd0;
|
||||
s2_B_delta <= 41'sd0;
|
||||
s2_C_in <= 9'sd0;
|
||||
s2_N_in <= 16'd0;
|
||||
s2_N_halved_plus_one <= 16'd0;
|
||||
s2_RESET <= 16'd0;
|
||||
s2_k <= 5'd0;
|
||||
s2_Errval <= 32'sd0;
|
||||
s2_context_index <= 9'd0;
|
||||
s2_strip_last_pixel <= 1'b0;
|
||||
s2_qbpp <= 5'd0;
|
||||
s2_LIMIT <= 7'd0;
|
||||
s2_map_invert <= 1'b0;
|
||||
stage_valid <= 1'b0;
|
||||
stage_A_accum <= 32'd0;
|
||||
stage_B_accum <= 41'sd0;
|
||||
stage_C_in <= 9'sd0;
|
||||
stage_N_in <= 16'd0;
|
||||
stage_N_halved_plus_one <= 16'd0;
|
||||
stage_RESET <= 16'd0;
|
||||
stage_k <= 5'd0;
|
||||
stage_Errval <= 32'sd0;
|
||||
stage_context_index <= 9'd0;
|
||||
stage_strip_last_pixel <= 1'b0;
|
||||
stage_qbpp <= 5'd0;
|
||||
stage_LIMIT <= 7'd0;
|
||||
stage_map_invert <= 1'b0;
|
||||
prebias_valid <= 1'b0;
|
||||
prebias_A_after_reset <= 32'd0;
|
||||
prebias_B_after_reset <= 41'sd0;
|
||||
prebias_C_in <= 9'sd0;
|
||||
prebias_N_after_increment <= 16'd0;
|
||||
prebias_k <= 5'd0;
|
||||
prebias_Errval <= 32'sd0;
|
||||
prebias_context_index <= 9'd0;
|
||||
prebias_strip_last_pixel <= 1'b0;
|
||||
prebias_qbpp <= 5'd0;
|
||||
prebias_LIMIT <= 7'd0;
|
||||
prebias_map_invert <= 1'b0;
|
||||
bias_valid <= 1'b0;
|
||||
bias_A_after_reset <= 32'd0;
|
||||
bias_B_after_bias <= 32'sd0;
|
||||
bias_C_after_bias <= 9'sd0;
|
||||
bias_N_after_increment <= 16'd0;
|
||||
bias_k <= 5'd0;
|
||||
bias_Errval <= 32'sd0;
|
||||
bias_context_index <= 9'd0;
|
||||
bias_strip_last_pixel <= 1'b0;
|
||||
bias_qbpp <= 5'd0;
|
||||
bias_LIMIT <= 7'd0;
|
||||
bias_map_invert <= 1'b0;
|
||||
result_valid <= 1'b0;
|
||||
result_next_valid <= 1'b0;
|
||||
result_next_k <= 5'd0;
|
||||
result_next_Errval_out <= 32'sd0;
|
||||
result_next_context_index_out <= 9'd0;
|
||||
result_next_strip_last_pixel_out <= 1'b0;
|
||||
result_next_qbpp_out <= 5'd0;
|
||||
result_next_LIMIT_out <= 7'd0;
|
||||
result_next_map_invert <= 1'b0;
|
||||
result_next_A_out <= 32'd0;
|
||||
result_next_B_out <= 32'sd0;
|
||||
result_next_C_out <= 9'sd0;
|
||||
result_next_N_out <= 16'd0;
|
||||
k <= 5'd0;
|
||||
Errval_out <= 32'sd0;
|
||||
context_index_out <= 9'd0;
|
||||
strip_last_pixel_out <= 1'b0;
|
||||
qbpp_out <= 5'd0;
|
||||
LIMIT_out <= 7'd0;
|
||||
map_invert <= 1'b0;
|
||||
A_out <= 32'd0;
|
||||
B_out <= 32'sd0;
|
||||
C_out <= 9'sd0;
|
||||
N_out <= 16'd0;
|
||||
end else begin
|
||||
if (result_accept && !result_promote_next && !result_direct_from_bias) begin
|
||||
result_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (result_promote_next) begin
|
||||
result_valid <= 1'b1;
|
||||
k <= result_next_k;
|
||||
Errval_out <= result_next_Errval_out;
|
||||
context_index_out <= result_next_context_index_out;
|
||||
strip_last_pixel_out <= result_next_strip_last_pixel_out;
|
||||
qbpp_out <= result_next_qbpp_out;
|
||||
LIMIT_out <= result_next_LIMIT_out;
|
||||
map_invert <= result_next_map_invert;
|
||||
A_out <= result_next_A_out;
|
||||
B_out <= result_next_B_out;
|
||||
C_out <= result_next_C_out;
|
||||
N_out <= result_next_N_out;
|
||||
result_next_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (result_direct_from_bias) begin
|
||||
result_valid <= 1'b1;
|
||||
k <= bias_k;
|
||||
Errval_out <= bias_Errval;
|
||||
context_index_out <= bias_context_index;
|
||||
strip_last_pixel_out <= bias_strip_last_pixel;
|
||||
qbpp_out <= bias_qbpp;
|
||||
LIMIT_out <= bias_LIMIT;
|
||||
map_invert <= bias_map_invert;
|
||||
A_out <= bias_A_after_reset;
|
||||
B_out <= bias_B_after_bias;
|
||||
C_out <= bias_C_after_bias;
|
||||
N_out <= bias_N_after_increment;
|
||||
end
|
||||
|
||||
if (result_store_next) begin
|
||||
result_next_valid <= 1'b1;
|
||||
result_next_k <= bias_k;
|
||||
result_next_Errval_out <= bias_Errval;
|
||||
result_next_context_index_out <= bias_context_index;
|
||||
result_next_strip_last_pixel_out <= bias_strip_last_pixel;
|
||||
result_next_qbpp_out <= bias_qbpp;
|
||||
result_next_LIMIT_out <= bias_LIMIT;
|
||||
result_next_map_invert <= bias_map_invert;
|
||||
result_next_A_out <= bias_A_after_reset;
|
||||
result_next_B_out <= bias_B_after_bias;
|
||||
result_next_C_out <= bias_C_after_bias;
|
||||
result_next_N_out <= bias_N_after_increment;
|
||||
end
|
||||
|
||||
if (prebias_to_bias) begin
|
||||
bias_valid <= 1'b1;
|
||||
bias_A_after_reset <= prebias_A_after_reset;
|
||||
bias_B_after_bias <= bias_stage_B_after_bias[31:0];
|
||||
bias_C_after_bias <= bias_stage_C_after_bias;
|
||||
bias_N_after_increment <= prebias_N_after_increment;
|
||||
bias_k <= prebias_k;
|
||||
bias_Errval <= prebias_Errval;
|
||||
bias_context_index <= prebias_context_index;
|
||||
bias_strip_last_pixel <= prebias_strip_last_pixel;
|
||||
bias_qbpp <= prebias_qbpp;
|
||||
bias_LIMIT <= prebias_LIMIT;
|
||||
bias_map_invert <= prebias_map_invert;
|
||||
end else if (bias_to_result) begin
|
||||
bias_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (stage_to_prebias) begin
|
||||
prebias_valid <= 1'b1;
|
||||
prebias_A_after_reset <= A_after_reset;
|
||||
prebias_B_after_reset <= B_after_reset;
|
||||
prebias_C_in <= stage_C_in;
|
||||
prebias_N_after_increment <= N_after_increment;
|
||||
prebias_k <= stage_k;
|
||||
prebias_Errval <= stage_Errval;
|
||||
prebias_context_index <= stage_context_index;
|
||||
prebias_strip_last_pixel <= stage_strip_last_pixel;
|
||||
prebias_qbpp <= stage_qbpp;
|
||||
prebias_LIMIT <= stage_LIMIT;
|
||||
prebias_map_invert <= stage_map_invert;
|
||||
end else if (prebias_to_bias) begin
|
||||
prebias_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s2_to_stage) begin
|
||||
stage_valid <= 1'b1;
|
||||
stage_A_accum <= s2_A_accum;
|
||||
stage_B_accum <= B_accum_next;
|
||||
stage_C_in <= s2_C_in;
|
||||
stage_N_in <= s2_N_in;
|
||||
stage_N_halved_plus_one <= s2_N_halved_plus_one;
|
||||
stage_RESET <= s2_RESET;
|
||||
stage_k <= s2_k;
|
||||
stage_Errval <= s2_Errval;
|
||||
stage_context_index <= s2_context_index;
|
||||
stage_strip_last_pixel <= s2_strip_last_pixel;
|
||||
stage_qbpp <= s2_qbpp;
|
||||
stage_LIMIT <= s2_LIMIT;
|
||||
stage_map_invert <= s2_map_invert;
|
||||
end else if (stage_to_prebias) begin
|
||||
stage_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s1_to_s2) begin
|
||||
s2_valid <= 1'b1;
|
||||
s2_A_accum <= s1_A_accum;
|
||||
s2_B_in <= s1_B_in;
|
||||
s2_B_delta <= B_delta;
|
||||
s2_C_in <= s1_C_in;
|
||||
s2_N_in <= s1_N_in;
|
||||
s2_N_halved_plus_one <= s1_N_halved_plus_one;
|
||||
s2_RESET <= s1_RESET;
|
||||
s2_k <= s1_k;
|
||||
s2_Errval <= s1_Errval;
|
||||
s2_context_index <= s1_context_index;
|
||||
s2_strip_last_pixel <= s1_strip_last_pixel;
|
||||
s2_qbpp <= s1_qbpp;
|
||||
s2_LIMIT <= s1_LIMIT;
|
||||
s2_map_invert <= s1_map_invert;
|
||||
end else if (s2_to_stage) begin
|
||||
s2_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s0_to_s1) begin
|
||||
s1_valid <= 1'b1;
|
||||
s1_A_accum <= A_accum_next;
|
||||
s1_B_in <= s0_B_in;
|
||||
s1_C_in <= s0_C_in;
|
||||
s1_N_in <= s0_N_in;
|
||||
s1_N_halved_plus_one <= N_halved_plus_one_next;
|
||||
s1_RESET <= s0_RESET;
|
||||
s1_k <= k_next;
|
||||
s1_Errval <= s0_Errval;
|
||||
s1_context_index <= s0_context_index;
|
||||
s1_strip_last_pixel <= s0_strip_last_pixel;
|
||||
s1_qbpp <= s0_qbpp;
|
||||
s1_LIMIT <= s0_LIMIT;
|
||||
s1_map_invert <= map_invert_next;
|
||||
s1_Errval_ext <= Errval_ext;
|
||||
s1_near_scale <= near_scale;
|
||||
end else if (s1_to_s2) begin
|
||||
s1_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (s0_promote_next) begin
|
||||
s0_valid <= 1'b1;
|
||||
s0_A_in <= update_next_A_in;
|
||||
s0_B_in <= update_next_B_in;
|
||||
s0_C_in <= update_next_C_in;
|
||||
s0_N_in <= update_next_N_in;
|
||||
s0_Errval <= update_next_Errval;
|
||||
s0_context_index <= update_next_context_index;
|
||||
s0_strip_last_pixel <= update_next_strip_last_pixel;
|
||||
s0_qbpp <= update_next_qbpp;
|
||||
s0_LIMIT <= update_next_LIMIT;
|
||||
s0_NEAR <= update_next_NEAR;
|
||||
s0_RESET <= update_next_RESET;
|
||||
update_next_valid <= 1'b0;
|
||||
end else if (s0_to_s1) begin
|
||||
s0_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (update_load_input) begin
|
||||
s0_valid <= 1'b1;
|
||||
s0_A_in <= A_in;
|
||||
s0_B_in <= B_in;
|
||||
s0_C_in <= C_in;
|
||||
s0_N_in <= N_in;
|
||||
s0_Errval <= Errval;
|
||||
s0_context_index <= context_index_in;
|
||||
s0_strip_last_pixel <= strip_last_pixel_in;
|
||||
s0_qbpp <= qbpp_in;
|
||||
s0_LIMIT <= LIMIT_in;
|
||||
s0_NEAR <= NEAR;
|
||||
s0_RESET <= RESET;
|
||||
end
|
||||
|
||||
if (update_store_next) begin
|
||||
update_next_valid <= 1'b1;
|
||||
update_next_A_in <= A_in;
|
||||
update_next_B_in <= B_in;
|
||||
update_next_C_in <= C_in;
|
||||
update_next_N_in <= N_in;
|
||||
update_next_Errval <= Errval;
|
||||
update_next_context_index <= context_index_in;
|
||||
update_next_strip_last_pixel <= strip_last_pixel_in;
|
||||
update_next_qbpp <= qbpp_in;
|
||||
update_next_LIMIT <= LIMIT_in;
|
||||
update_next_NEAR <= NEAR;
|
||||
update_next_RESET <= RESET;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
144
fpga/verilog/jls_error_mapper.sv
Normal file
144
fpga/verilog/jls_error_mapper.sv
Normal file
@@ -0,0 +1,144 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Map signed Errval into non-negative MErrval
|
||||
// Trace : docs/jls_traceability.md#golomb-rice-encoding
|
||||
// Example : Errval=-3 maps to MErrval=5; Errval=3 maps to MErrval=6.
|
||||
//
|
||||
// Registered error mapper. The upstream regular-mode stage computes Errval,
|
||||
// k, LIMIT, qbpp, and whether the context correction inverts Errval before
|
||||
// mapping. This module only performs the standard signed-to-unsigned mapping
|
||||
// and forwards coding parameters to jls_golomb_encoder.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_error_mapper (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input prediction-error event is valid.
|
||||
input var logic err_valid,
|
||||
|
||||
// This mapper can accept the current prediction-error event.
|
||||
output logic err_ready,
|
||||
|
||||
// Standard signed prediction error after quantization and context sign.
|
||||
input var logic signed [31:0] Errval,
|
||||
|
||||
// High when context correction requests bitwise inversion before mapping.
|
||||
input var logic map_invert,
|
||||
|
||||
// JPEG-LS Golomb parameter k.
|
||||
input var logic [4:0] k,
|
||||
|
||||
// JPEG-LS LIMIT parameter for the current coding mode.
|
||||
input var logic [6:0] limit,
|
||||
|
||||
// JPEG-LS qbpp parameter for the current coding mode.
|
||||
input var logic [4:0] qbpp,
|
||||
|
||||
// Last pixel of the current strip frame.
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// Mapped-error event is valid.
|
||||
output logic mapped_valid,
|
||||
|
||||
// Downstream Golomb encoder accepted the mapped-error event.
|
||||
input var logic mapped_ready,
|
||||
|
||||
// Standard non-negative mapped error value.
|
||||
output logic [31:0] MErrval,
|
||||
|
||||
// Forwarded coding parameters.
|
||||
output logic [4:0] mapped_k,
|
||||
output logic [6:0] mapped_limit,
|
||||
output logic [4:0] mapped_qbpp,
|
||||
|
||||
// Forwarded strip boundary flag.
|
||||
output logic mapped_strip_last_pixel
|
||||
);
|
||||
|
||||
// Source value after optional context correction.
|
||||
logic signed [31:0] corrected_Errval;
|
||||
logic signed [32:0] corrected_Errval_ext;
|
||||
logic signed [32:0] abs_Errval_ext;
|
||||
logic [32:0] MErrval_ext;
|
||||
|
||||
// Handshake terms.
|
||||
logic slot_open;
|
||||
logic accept_err;
|
||||
|
||||
always_comb begin
|
||||
corrected_Errval = Errval;
|
||||
if (map_invert) begin
|
||||
corrected_Errval = ~Errval;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
corrected_Errval_ext = {corrected_Errval[31], corrected_Errval};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
abs_Errval_ext = corrected_Errval_ext;
|
||||
if (corrected_Errval_ext < 33'sd0) begin
|
||||
abs_Errval_ext = -corrected_Errval_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
MErrval_ext = abs_Errval_ext[32:0] << 1;
|
||||
if (corrected_Errval_ext < 33'sd0) begin
|
||||
MErrval_ext = (abs_Errval_ext[32:0] << 1) - 33'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_open = 1'b0;
|
||||
if (!mapped_valid || mapped_ready) begin
|
||||
slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
err_ready = slot_open;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_err = 1'b0;
|
||||
if (err_valid && err_ready) begin
|
||||
accept_err = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
mapped_valid <= 1'b0;
|
||||
MErrval <= 32'd0;
|
||||
mapped_k <= 5'd0;
|
||||
mapped_limit <= 7'd0;
|
||||
mapped_qbpp <= 5'd0;
|
||||
mapped_strip_last_pixel <= 1'b0;
|
||||
end else begin
|
||||
if (mapped_valid && mapped_ready && !accept_err) begin
|
||||
mapped_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (accept_err) begin
|
||||
mapped_valid <= 1'b1;
|
||||
MErrval <= MErrval_ext[31:0];
|
||||
mapped_k <= k;
|
||||
mapped_limit <= limit;
|
||||
mapped_qbpp <= qbpp;
|
||||
mapped_strip_last_pixel <= strip_last_pixel;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
371
fpga/verilog/jls_golomb_encoder.sv
Normal file
371
fpga/verilog/jls_golomb_encoder.sv
Normal file
@@ -0,0 +1,371 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex G.2 variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Encode MErrval using k, LIMIT, and qbpp
|
||||
// Trace : docs/jls_traceability.md#golomb-rice-encoding
|
||||
// Example : MErrval=5, k=1 emits bits 0,0,1,1.
|
||||
//
|
||||
// Golomb code-event generator. This module starts from the standard mapped
|
||||
// error value MErrval and Golomb parameter k. Earlier pipeline stages are
|
||||
// responsible for computing Errval, MErrval, k, LIMIT, and qbpp from the
|
||||
// JPEG-LS context variables.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_golomb_encoder #(
|
||||
// Maximum code bits sent to jls_bit_packer in one event.
|
||||
parameter int MAX_CODE_BITS = 64
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// MErrval input event is valid.
|
||||
input var logic mapped_valid,
|
||||
|
||||
// This encoder can accept MErrval and coding parameters.
|
||||
output logic mapped_ready,
|
||||
|
||||
// JPEG-LS mapped error value, named after the standard pseudocode variable.
|
||||
input var logic [31:0] MErrval,
|
||||
|
||||
// JPEG-LS Golomb parameter k.
|
||||
input var logic [4:0] k,
|
||||
|
||||
// JPEG-LS LIMIT parameter for the current coding mode.
|
||||
input var logic [6:0] limit,
|
||||
|
||||
// JPEG-LS qbpp parameter for the current coding mode.
|
||||
input var logic [4:0] qbpp,
|
||||
|
||||
// Last pixel of the current strip frame.
|
||||
input var logic mapped_strip_last_pixel,
|
||||
|
||||
// Generated left-aligned code event for jls_bit_packer.
|
||||
output logic code_valid,
|
||||
|
||||
// jls_bit_packer accepted the current code event.
|
||||
input var logic code_ready,
|
||||
|
||||
// Left-aligned code bits. The first bit is code_bits[MAX_CODE_BITS-1].
|
||||
output logic [MAX_CODE_BITS-1:0] code_bits,
|
||||
|
||||
// Number of valid bits in code_bits.
|
||||
output logic [6:0] code_bit_count,
|
||||
|
||||
// Encoder completed the current MErrval event.
|
||||
output logic mapped_done,
|
||||
|
||||
// Encoder completed the last pixel of the current strip frame.
|
||||
output logic mapped_last_done
|
||||
);
|
||||
|
||||
// State machine for emitting prefix chunks and optional suffix bits.
|
||||
typedef enum logic [2:0] {
|
||||
STATE_IDLE = 3'd0,
|
||||
STATE_PREP = 3'd1,
|
||||
STATE_SUFFIX_PREP = 3'd2,
|
||||
STATE_PREFIX = 3'd3,
|
||||
STATE_SUFFIX = 3'd4,
|
||||
STATE_DONE = 3'd5,
|
||||
STATE_SELECT = 3'd6
|
||||
} golomb_state_e;
|
||||
|
||||
// Maximum code event size as a runtime-comparable constant.
|
||||
localparam logic [6:0] MAX_CODE_BITS_VALUE = MAX_CODE_BITS;
|
||||
|
||||
// Current state.
|
||||
golomb_state_e state;
|
||||
|
||||
// Latched coding parameters for the active mapped-error event.
|
||||
logic [6:0] prefix_remaining;
|
||||
logic [31:0] suffix_bits;
|
||||
logic [6:0] suffix_bit_count;
|
||||
logic active_strip_last_pixel;
|
||||
logic [31:0] latched_MErrval;
|
||||
logic [31:0] latched_MErrval_minus_one;
|
||||
logic [4:0] latched_k;
|
||||
logic [6:0] latched_limit;
|
||||
logic [4:0] latched_qbpp;
|
||||
logic latched_strip_last_pixel;
|
||||
logic [31:0] suffix_base_bits;
|
||||
logic [31:0] high_bits_latched;
|
||||
logic [6:0] prefix_threshold_latched;
|
||||
logic [6:0] normal_prefix_count_latched;
|
||||
logic [6:0] limited_prefix_count_latched;
|
||||
|
||||
// Pending code-event bookkeeping. Counters are updated when code_ready
|
||||
// accepts the event.
|
||||
logic pending_prefix_event;
|
||||
logic pending_prefix_last;
|
||||
logic pending_suffix_event;
|
||||
logic [6:0] pending_prefix_count;
|
||||
|
||||
// Combinational input analysis.
|
||||
logic accept_mapped;
|
||||
logic [31:0] high_bits;
|
||||
logic [6:0] prefix_threshold;
|
||||
logic use_regular_golomb_path;
|
||||
logic [6:0] normal_prefix_count;
|
||||
logic [6:0] limited_prefix_count;
|
||||
logic [6:0] selected_prefix_count;
|
||||
logic [6:0] selected_suffix_count;
|
||||
logic high_bits_upper_nonzero;
|
||||
logic [31:0] suffix_mask;
|
||||
logic [31:0] selected_suffix_bits;
|
||||
|
||||
// Combinational code-event builders.
|
||||
logic code_slot_open;
|
||||
logic [6:0] prefix_emit_count;
|
||||
logic prefix_emit_is_last;
|
||||
logic [MAX_CODE_BITS-1:0] prefix_event_bits;
|
||||
logic [MAX_CODE_BITS-1:0] suffix_event_bits;
|
||||
|
||||
// Loop index declared outside procedural blocks per project coding style.
|
||||
integer suffix_bit_index;
|
||||
|
||||
always_comb begin
|
||||
mapped_ready = 1'b0;
|
||||
if (state == STATE_IDLE && !code_valid) begin
|
||||
mapped_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_mapped = 1'b0;
|
||||
if (mapped_valid && mapped_ready) begin
|
||||
accept_mapped = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
code_slot_open = 1'b0;
|
||||
if (!code_valid || code_ready) begin
|
||||
code_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
high_bits = latched_MErrval >> latched_k;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prefix_threshold = 7'd0;
|
||||
if (latched_limit > ({2'b00, latched_qbpp} + 7'd1)) begin
|
||||
prefix_threshold = latched_limit - {2'b00, latched_qbpp} - 7'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
high_bits_upper_nonzero = 1'b0;
|
||||
if (high_bits_latched[31:7] != 25'd0) begin
|
||||
high_bits_upper_nonzero = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
use_regular_golomb_path = 1'b0;
|
||||
if (!high_bits_upper_nonzero &&
|
||||
high_bits_latched[6:0] < prefix_threshold_latched) begin
|
||||
use_regular_golomb_path = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
normal_prefix_count = high_bits[6:0] + 7'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
limited_prefix_count = 7'd1;
|
||||
if (latched_limit > {2'b00, latched_qbpp}) begin
|
||||
limited_prefix_count = latched_limit - {2'b00, latched_qbpp};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
selected_prefix_count = limited_prefix_count_latched;
|
||||
selected_suffix_count = {2'b00, latched_qbpp};
|
||||
if (use_regular_golomb_path) begin
|
||||
selected_prefix_count = normal_prefix_count_latched;
|
||||
selected_suffix_count = {2'b00, latched_k};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
suffix_mask = 32'd0;
|
||||
if (suffix_bit_count != 7'd0) begin
|
||||
suffix_mask = (32'd1 << suffix_bit_count[4:0]) - 32'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
selected_suffix_bits = suffix_base_bits & suffix_mask;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prefix_emit_count = prefix_remaining;
|
||||
prefix_emit_is_last = 1'b1;
|
||||
if (prefix_remaining > MAX_CODE_BITS_VALUE) begin
|
||||
prefix_emit_count = MAX_CODE_BITS_VALUE;
|
||||
prefix_emit_is_last = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prefix_event_bits = {MAX_CODE_BITS{1'b0}};
|
||||
if (prefix_emit_is_last && prefix_emit_count != 7'd0) begin
|
||||
prefix_event_bits[MAX_CODE_BITS_VALUE - prefix_emit_count] = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
suffix_event_bits = {MAX_CODE_BITS{1'b0}};
|
||||
for (suffix_bit_index = 0; suffix_bit_index < MAX_CODE_BITS; suffix_bit_index = suffix_bit_index + 1) begin
|
||||
if (suffix_bit_index < suffix_bit_count) begin
|
||||
suffix_event_bits[MAX_CODE_BITS - 1 - suffix_bit_index] =
|
||||
suffix_bits[suffix_bit_count - 7'd1 - suffix_bit_index[6:0]];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
prefix_remaining <= 7'd0;
|
||||
suffix_bits <= 32'd0;
|
||||
suffix_bit_count <= 7'd0;
|
||||
active_strip_last_pixel <= 1'b0;
|
||||
latched_MErrval <= 32'd0;
|
||||
latched_MErrval_minus_one <= 32'd0;
|
||||
latched_k <= 5'd0;
|
||||
latched_limit <= 7'd0;
|
||||
latched_qbpp <= 5'd0;
|
||||
latched_strip_last_pixel <= 1'b0;
|
||||
suffix_base_bits <= 32'd0;
|
||||
high_bits_latched <= 32'd0;
|
||||
prefix_threshold_latched <= 7'd0;
|
||||
normal_prefix_count_latched <= 7'd0;
|
||||
limited_prefix_count_latched <= 7'd0;
|
||||
pending_prefix_event <= 1'b0;
|
||||
pending_prefix_last <= 1'b0;
|
||||
pending_suffix_event <= 1'b0;
|
||||
pending_prefix_count <= 7'd0;
|
||||
code_valid <= 1'b0;
|
||||
code_bits <= {MAX_CODE_BITS{1'b0}};
|
||||
code_bit_count <= 7'd0;
|
||||
mapped_done <= 1'b0;
|
||||
mapped_last_done <= 1'b0;
|
||||
end else begin
|
||||
mapped_done <= 1'b0;
|
||||
mapped_last_done <= 1'b0;
|
||||
|
||||
if (code_valid && code_ready) begin
|
||||
code_valid <= 1'b0;
|
||||
code_bits <= {MAX_CODE_BITS{1'b0}};
|
||||
code_bit_count <= 7'd0;
|
||||
|
||||
if (pending_prefix_event) begin
|
||||
prefix_remaining <= prefix_remaining - pending_prefix_count;
|
||||
if (pending_prefix_last) begin
|
||||
if (suffix_bit_count != 7'd0) begin
|
||||
state <= STATE_SUFFIX;
|
||||
end else begin
|
||||
state <= STATE_DONE;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (pending_suffix_event) begin
|
||||
state <= STATE_DONE;
|
||||
end
|
||||
|
||||
pending_prefix_event <= 1'b0;
|
||||
pending_prefix_last <= 1'b0;
|
||||
pending_suffix_event <= 1'b0;
|
||||
pending_prefix_count <= 7'd0;
|
||||
end
|
||||
|
||||
if (accept_mapped) begin
|
||||
latched_MErrval <= MErrval;
|
||||
latched_MErrval_minus_one <= MErrval - 32'd1;
|
||||
latched_k <= k;
|
||||
latched_limit <= limit;
|
||||
latched_qbpp <= qbpp;
|
||||
latched_strip_last_pixel <= mapped_strip_last_pixel;
|
||||
state <= STATE_PREP;
|
||||
end else if (code_slot_open && !code_valid) begin
|
||||
case (state)
|
||||
STATE_PREP: begin
|
||||
// Stage note : The standard Annex G.2 Golomb selection is
|
||||
// split across STATE_PREP and STATE_SELECT. This stage registers
|
||||
// MErrval>>k, LIMIT/qbpp threshold, and candidate prefix lengths
|
||||
// before the compare/mux that writes prefix_remaining.
|
||||
high_bits_latched <= high_bits;
|
||||
prefix_threshold_latched <= prefix_threshold;
|
||||
normal_prefix_count_latched <= normal_prefix_count;
|
||||
limited_prefix_count_latched <= limited_prefix_count;
|
||||
active_strip_last_pixel <= latched_strip_last_pixel;
|
||||
state <= STATE_SELECT;
|
||||
end
|
||||
|
||||
STATE_SELECT: begin
|
||||
// Stage note : Use the registered high_bits value. The standard
|
||||
// condition high_bits < LIMIT-qbpp-1 is implemented as an upper-bit
|
||||
// zero test plus a 7-bit compare, avoiding a long 32-bit carry path.
|
||||
prefix_remaining <= selected_prefix_count;
|
||||
suffix_bit_count <= selected_suffix_count;
|
||||
suffix_base_bits <= latched_MErrval_minus_one;
|
||||
if (use_regular_golomb_path) begin
|
||||
suffix_base_bits <= latched_MErrval;
|
||||
end
|
||||
state <= STATE_SUFFIX_PREP;
|
||||
end
|
||||
|
||||
STATE_SUFFIX_PREP: begin
|
||||
suffix_bits <= selected_suffix_bits;
|
||||
state <= STATE_PREFIX;
|
||||
end
|
||||
|
||||
STATE_PREFIX: begin
|
||||
if (prefix_remaining != 7'd0) begin
|
||||
code_valid <= 1'b1;
|
||||
code_bits <= prefix_event_bits;
|
||||
code_bit_count <= prefix_emit_count;
|
||||
pending_prefix_event <= 1'b1;
|
||||
pending_prefix_last <= prefix_emit_is_last;
|
||||
pending_prefix_count <= prefix_emit_count;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_SUFFIX: begin
|
||||
if (suffix_bit_count != 7'd0) begin
|
||||
code_valid <= 1'b1;
|
||||
code_bits <= suffix_event_bits;
|
||||
code_bit_count <= suffix_bit_count;
|
||||
pending_suffix_event <= 1'b1;
|
||||
end else begin
|
||||
state <= STATE_DONE;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_DONE: begin
|
||||
mapped_done <= 1'b1;
|
||||
mapped_last_done <= active_strip_last_pixel;
|
||||
active_strip_last_pixel <= 1'b0;
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
|
||||
default: begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
442
fpga/verilog/jls_header_writer.sv
Normal file
442
fpga/verilog/jls_header_writer.sv
Normal file
@@ -0,0 +1,442 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.2 frame header, C.2.3 scan header, C.2.4.1 LSE
|
||||
// Figure : N/A
|
||||
// Table : Table C.1 preset parameters, Table C.2 RESET, Table C.3 defaults
|
||||
// Pseudocode : JPEG-LS marker segment emission before and after one scan
|
||||
// Trace : docs/jls_traceability.md#jls-header-markers
|
||||
// Example : For PIX_WIDTH=8 and width=32, SOF55 emits P=8, Y=16, X=32.
|
||||
//
|
||||
// Header writer for one standalone grayscale JPEG-LS strip frame. A strip
|
||||
// start command emits SOI, SOF55, LSE preset coding parameters, and SOS. A
|
||||
// strip finish command emits EOI after the entropy payload has been flushed by
|
||||
// the bit packer.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_header_writer #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Start command for one standalone strip frame.
|
||||
input var logic strip_start_valid,
|
||||
|
||||
// High when a strip start command can be accepted.
|
||||
output logic strip_start_ready,
|
||||
|
||||
// Marks the first strip of an original input image for ofifo_wdata[8].
|
||||
input var logic original_image_first_strip,
|
||||
|
||||
// JPEG-LS frame width written to SOF55.X.
|
||||
input var logic [12:0] strip_width,
|
||||
|
||||
// JPEG-LS frame height written to SOF55.Y.
|
||||
input var logic [12:0] strip_height,
|
||||
|
||||
// NEAR parameter written to the JPEG-LS SOS segment.
|
||||
input var logic [5:0] near,
|
||||
|
||||
// JPEG-LS LSE MAXVAL preset coding parameter.
|
||||
input var logic [15:0] preset_maxval,
|
||||
|
||||
// JPEG-LS LSE T1 preset coding parameter.
|
||||
input var logic [15:0] preset_t1,
|
||||
|
||||
// JPEG-LS LSE T2 preset coding parameter.
|
||||
input var logic [15:0] preset_t2,
|
||||
|
||||
// JPEG-LS LSE T3 preset coding parameter.
|
||||
input var logic [15:0] preset_t3,
|
||||
|
||||
// JPEG-LS LSE RESET preset coding parameter.
|
||||
input var logic [15:0] preset_reset,
|
||||
|
||||
// Finish command after the strip entropy payload has been byte-flushed.
|
||||
input var logic strip_finish_valid,
|
||||
|
||||
// High when a strip finish command can be accepted.
|
||||
output logic strip_finish_ready,
|
||||
|
||||
// Encoded marker byte is valid.
|
||||
output logic byte_valid,
|
||||
|
||||
// Downstream byte buffer can accept the marker byte.
|
||||
input var logic byte_ready,
|
||||
|
||||
// Encoded marker byte in JPEG marker-stream order.
|
||||
output logic [7:0] byte_data,
|
||||
|
||||
// Sideband for the first SOI byte of an original input image.
|
||||
output logic original_image_start,
|
||||
|
||||
// Header completion pulse after the last SOS byte is accepted.
|
||||
output logic header_done,
|
||||
|
||||
// EOI completion pulse after the EOI marker is accepted.
|
||||
output logic eoi_done
|
||||
);
|
||||
|
||||
import jls_common_pkg::*;
|
||||
|
||||
// Header stream is SOI(2) + SOF55(13) + LSE(15) + SOS(10).
|
||||
localparam logic [5:0] HEADER_LAST_INDEX = 6'd39;
|
||||
|
||||
// EOI stream is FF D9.
|
||||
localparam logic [1:0] EOI_LAST_INDEX = 2'd1;
|
||||
|
||||
// State machine split keeps the command path shallow and the byte lookup
|
||||
// separate from state update.
|
||||
typedef enum logic [1:0] {
|
||||
STATE_IDLE = 2'd0,
|
||||
STATE_HEADER = 2'd1,
|
||||
STATE_EOI = 2'd2
|
||||
} header_state_e;
|
||||
|
||||
// Current and next state for marker emission.
|
||||
header_state_e state;
|
||||
header_state_e state_next;
|
||||
|
||||
// Byte indexes inside the header and EOI byte sequences.
|
||||
logic [5:0] header_index;
|
||||
logic [5:0] header_index_next;
|
||||
logic [1:0] eoi_index;
|
||||
logic [1:0] eoi_index_next;
|
||||
|
||||
// Latched command fields used while emitting a strip header.
|
||||
logic latched_original_image_first_strip;
|
||||
logic [12:0] latched_strip_width;
|
||||
logic [12:0] latched_strip_height;
|
||||
logic [5:0] latched_near;
|
||||
logic [15:0] latched_preset_maxval;
|
||||
logic [15:0] latched_preset_t1;
|
||||
logic [15:0] latched_preset_t2;
|
||||
logic [15:0] latched_preset_t3;
|
||||
logic [15:0] latched_preset_reset;
|
||||
|
||||
// Decoded byte fields from latched command fields.
|
||||
logic [7:0] strip_width_hi;
|
||||
logic [7:0] strip_width_lo;
|
||||
logic [7:0] strip_height_hi;
|
||||
logic [7:0] strip_height_lo;
|
||||
logic [7:0] near_byte;
|
||||
logic [7:0] preset_maxval_hi;
|
||||
logic [7:0] preset_maxval_lo;
|
||||
logic [7:0] preset_t1_hi;
|
||||
logic [7:0] preset_t1_lo;
|
||||
logic [7:0] preset_t2_hi;
|
||||
logic [7:0] preset_t2_lo;
|
||||
logic [7:0] preset_t3_hi;
|
||||
logic [7:0] preset_t3_lo;
|
||||
logic [7:0] preset_reset_hi;
|
||||
logic [7:0] preset_reset_lo;
|
||||
|
||||
// Output and handshake next-state signals.
|
||||
logic accept_start;
|
||||
logic accept_finish;
|
||||
logic output_fire;
|
||||
logic byte_valid_next;
|
||||
logic [7:0] byte_data_next;
|
||||
logic original_image_start_next;
|
||||
logic [5:0] header_byte_index;
|
||||
logic [1:0] eoi_byte_index;
|
||||
logic [7:0] header_byte;
|
||||
logic [7:0] eoi_byte;
|
||||
|
||||
always_comb begin
|
||||
strip_width_hi = {3'b000, latched_strip_width[12:8]};
|
||||
strip_width_lo = latched_strip_width[7:0];
|
||||
strip_height_hi = {3'b000, latched_strip_height[12:8]};
|
||||
strip_height_lo = latched_strip_height[7:0];
|
||||
near_byte = {2'b00, latched_near};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
preset_maxval_hi = latched_preset_maxval[15:8];
|
||||
preset_maxval_lo = latched_preset_maxval[7:0];
|
||||
preset_t1_hi = latched_preset_t1[15:8];
|
||||
preset_t1_lo = latched_preset_t1[7:0];
|
||||
preset_t2_hi = latched_preset_t2[15:8];
|
||||
preset_t2_lo = latched_preset_t2[7:0];
|
||||
preset_t3_hi = latched_preset_t3[15:8];
|
||||
preset_t3_lo = latched_preset_t3[7:0];
|
||||
preset_reset_hi = latched_preset_reset[15:8];
|
||||
preset_reset_lo = latched_preset_reset[7:0];
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_start_ready = 1'b0;
|
||||
strip_finish_ready = 1'b0;
|
||||
if (state == STATE_IDLE) begin
|
||||
strip_start_ready = 1'b1;
|
||||
if (!strip_start_valid) begin
|
||||
strip_finish_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_start = 1'b0;
|
||||
if (strip_start_valid && strip_start_ready) begin
|
||||
accept_start = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_finish = 1'b0;
|
||||
if (strip_finish_valid && strip_finish_ready) begin
|
||||
accept_finish = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
output_fire = 1'b0;
|
||||
if (byte_valid && byte_ready) begin
|
||||
output_fire = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
header_byte_index = header_index;
|
||||
if (state == STATE_IDLE && accept_start) begin
|
||||
header_byte_index = 6'd0;
|
||||
end else if (state == STATE_HEADER && output_fire && header_index != HEADER_LAST_INDEX) begin
|
||||
header_byte_index = header_index + 6'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
eoi_byte_index = eoi_index;
|
||||
if (state == STATE_IDLE && accept_finish) begin
|
||||
eoi_byte_index = 2'd0;
|
||||
end else if (state == STATE_EOI && output_fire && eoi_index != EOI_LAST_INDEX) begin
|
||||
eoi_byte_index = eoi_index + 2'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
header_byte = 8'h00;
|
||||
case (header_byte_index)
|
||||
// SOI: T.87 C.1 uses JPEG marker syntax from T.81 B.1.
|
||||
6'd0: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd1: header_byte = JLS_MARKER_SOI;
|
||||
|
||||
// SOF55: T.87 C.2.2, one grayscale component.
|
||||
6'd2: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd3: header_byte = JLS_MARKER_SOF55;
|
||||
6'd4: header_byte = 8'h00;
|
||||
6'd5: header_byte = 8'h0B;
|
||||
6'd6: header_byte = PIX_WIDTH[7:0];
|
||||
6'd7: header_byte = strip_height_hi;
|
||||
6'd8: header_byte = strip_height_lo;
|
||||
6'd9: header_byte = strip_width_hi;
|
||||
6'd10: header_byte = strip_width_lo;
|
||||
6'd11: header_byte = 8'h01;
|
||||
6'd12: header_byte = 8'h01;
|
||||
6'd13: header_byte = 8'h11;
|
||||
6'd14: header_byte = 8'h00;
|
||||
|
||||
// LSE preset coding parameters: T.87 C.2.4.1.1, type 1.
|
||||
6'd15: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd16: header_byte = JLS_MARKER_LSE;
|
||||
6'd17: header_byte = 8'h00;
|
||||
6'd18: header_byte = 8'h0D;
|
||||
6'd19: header_byte = 8'h01;
|
||||
6'd20: header_byte = preset_maxval_hi;
|
||||
6'd21: header_byte = preset_maxval_lo;
|
||||
6'd22: header_byte = preset_t1_hi;
|
||||
6'd23: header_byte = preset_t1_lo;
|
||||
6'd24: header_byte = preset_t2_hi;
|
||||
6'd25: header_byte = preset_t2_lo;
|
||||
6'd26: header_byte = preset_t3_hi;
|
||||
6'd27: header_byte = preset_t3_lo;
|
||||
6'd28: header_byte = preset_reset_hi;
|
||||
6'd29: header_byte = preset_reset_lo;
|
||||
|
||||
// SOS: T.87 C.2.3, one component, no mapping table, ILV=0.
|
||||
6'd30: header_byte = JLS_MARKER_PREFIX;
|
||||
6'd31: header_byte = JLS_MARKER_SOS;
|
||||
6'd32: header_byte = 8'h00;
|
||||
6'd33: header_byte = 8'h08;
|
||||
6'd34: header_byte = 8'h01;
|
||||
6'd35: header_byte = 8'h01;
|
||||
6'd36: header_byte = 8'h00;
|
||||
6'd37: header_byte = near_byte;
|
||||
6'd38: header_byte = 8'h00;
|
||||
6'd39: header_byte = 8'h00;
|
||||
default: header_byte = 8'h00;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
eoi_byte = 8'h00;
|
||||
case (eoi_byte_index)
|
||||
2'd0: eoi_byte = JLS_MARKER_PREFIX;
|
||||
2'd1: eoi_byte = JLS_MARKER_EOI;
|
||||
default: eoi_byte = 8'h00;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
state_next = state;
|
||||
header_index_next = header_index;
|
||||
eoi_index_next = eoi_index;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
header_index_next = 6'd0;
|
||||
eoi_index_next = 2'd0;
|
||||
if (accept_start) begin
|
||||
state_next = STATE_HEADER;
|
||||
end else if (accept_finish) begin
|
||||
state_next = STATE_EOI;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_HEADER: begin
|
||||
if (output_fire) begin
|
||||
if (header_index == HEADER_LAST_INDEX) begin
|
||||
header_index_next = 6'd0;
|
||||
state_next = STATE_IDLE;
|
||||
end else begin
|
||||
header_index_next = header_index + 6'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_EOI: begin
|
||||
if (output_fire) begin
|
||||
if (eoi_index == EOI_LAST_INDEX) begin
|
||||
eoi_index_next = 2'd0;
|
||||
state_next = STATE_IDLE;
|
||||
end else begin
|
||||
eoi_index_next = eoi_index + 2'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state_next = STATE_IDLE;
|
||||
header_index_next = 6'd0;
|
||||
eoi_index_next = 2'd0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_valid_next = byte_valid;
|
||||
byte_data_next = byte_data;
|
||||
original_image_start_next = original_image_start;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
original_image_start_next = 1'b0;
|
||||
if (accept_start) begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = header_byte;
|
||||
original_image_start_next = original_image_first_strip;
|
||||
end else if (accept_finish) begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = eoi_byte;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_HEADER: begin
|
||||
if (output_fire) begin
|
||||
if (header_index == HEADER_LAST_INDEX) begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
original_image_start_next = 1'b0;
|
||||
end else begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = header_byte;
|
||||
if (header_byte_index == 6'd0 && latched_original_image_first_strip) begin
|
||||
original_image_start_next = 1'b1;
|
||||
end else begin
|
||||
original_image_start_next = 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_EOI: begin
|
||||
if (output_fire) begin
|
||||
if (eoi_index == EOI_LAST_INDEX) begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
end else begin
|
||||
byte_valid_next = 1'b1;
|
||||
byte_data_next = eoi_byte;
|
||||
end
|
||||
original_image_start_next = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
byte_valid_next = 1'b0;
|
||||
byte_data_next = 8'h00;
|
||||
original_image_start_next = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
header_index <= 6'd0;
|
||||
eoi_index <= 2'd0;
|
||||
latched_original_image_first_strip <= 1'b0;
|
||||
latched_strip_width <= 13'd0;
|
||||
latched_strip_height <= 13'd0;
|
||||
latched_near <= 6'd0;
|
||||
latched_preset_maxval <= 16'd0;
|
||||
latched_preset_t1 <= 16'd0;
|
||||
latched_preset_t2 <= 16'd0;
|
||||
latched_preset_t3 <= 16'd0;
|
||||
latched_preset_reset <= 16'd0;
|
||||
byte_valid <= 1'b0;
|
||||
byte_data <= 8'h00;
|
||||
original_image_start <= 1'b0;
|
||||
header_done <= 1'b0;
|
||||
eoi_done <= 1'b0;
|
||||
end else begin
|
||||
header_done <= 1'b0;
|
||||
eoi_done <= 1'b0;
|
||||
state <= state_next;
|
||||
header_index <= header_index_next;
|
||||
eoi_index <= eoi_index_next;
|
||||
byte_valid <= byte_valid_next;
|
||||
byte_data <= byte_data_next;
|
||||
original_image_start <= original_image_start_next;
|
||||
|
||||
if (state == STATE_HEADER && output_fire && header_index == HEADER_LAST_INDEX) begin
|
||||
header_done <= 1'b1;
|
||||
end
|
||||
|
||||
if (state == STATE_EOI && output_fire && eoi_index == EOI_LAST_INDEX) begin
|
||||
eoi_done <= 1'b1;
|
||||
end
|
||||
|
||||
if (accept_start) begin
|
||||
latched_original_image_first_strip <= original_image_first_strip;
|
||||
latched_strip_width <= strip_width;
|
||||
latched_strip_height <= strip_height;
|
||||
latched_near <= near;
|
||||
latched_preset_maxval <= preset_maxval;
|
||||
latched_preset_t1 <= preset_t1;
|
||||
latched_preset_t2 <= preset_t2;
|
||||
latched_preset_t3 <= preset_t3;
|
||||
latched_preset_reset <= preset_reset;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
354
fpga/verilog/jls_input_ctrl.sv
Normal file
354
fpga/verilog/jls_input_ctrl.sv
Normal file
@@ -0,0 +1,354 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 Control procedure, Annex D.1-D.3 scan control
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Source image sample ordering before JPEG-LS encoding
|
||||
// Example : See docs/jls_module_interfaces.md
|
||||
//
|
||||
// Input controller for the JPEG-LS encoder. This module aligns the standard
|
||||
// synchronous FIFO read latency, waits for the original-image SOF sideband,
|
||||
// samples runtime configuration at the SOF pixel, and emits one registered
|
||||
// pixel event at a time for the downstream scan controller.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_input_ctrl #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Default image width used when runtime configuration is invalid.
|
||||
parameter int DEFAULT_PIC_COL = 6144,
|
||||
|
||||
// Default image height used when runtime configuration is invalid.
|
||||
parameter int DEFAULT_PIC_ROW = 256,
|
||||
|
||||
// Maximum supported runtime image width.
|
||||
parameter int MAX_PIC_COL = 6144,
|
||||
|
||||
// Maximum supported runtime image height.
|
||||
parameter int MAX_PIC_ROW = 4096,
|
||||
|
||||
// Number of original-image rows in one standalone JPEG-LS strip frame.
|
||||
parameter int SCAN_ROWS = 16,
|
||||
|
||||
// Packed input FIFO width: one SOF sideband bit per input byte lane.
|
||||
parameter int IFIFO_DATA_WIDTH = ((PIX_WIDTH + 7) / 8) * 9
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Runtime image width sampled when the first SOF pixel is accepted.
|
||||
input var logic [12:0] cfg_pic_col,
|
||||
|
||||
// Runtime image height sampled when the first SOF pixel is accepted.
|
||||
input var logic [12:0] cfg_pic_row,
|
||||
|
||||
// Runtime compression-ratio selector sampled when the first SOF pixel is accepted.
|
||||
input var logic [3:0] ratio,
|
||||
|
||||
// Input FIFO read clock, same frequency and phase as clk.
|
||||
output logic ififo_rclk,
|
||||
|
||||
// Input FIFO read request. FIFO data is valid on the next clk cycle.
|
||||
output logic ififo_rd,
|
||||
|
||||
// Packed SOF flag and grayscale sample from the input FIFO.
|
||||
input var logic [IFIFO_DATA_WIDTH-1:0] ififo_rdata,
|
||||
|
||||
// Input FIFO empty flag.
|
||||
input var logic ififo_empty,
|
||||
|
||||
// Input FIFO almost-empty flag used for read optimization.
|
||||
input var logic ififo_alempty,
|
||||
|
||||
// Downstream stage can accept the current pixel event.
|
||||
input var logic pixel_ready,
|
||||
|
||||
// Pause request from downstream or output-buffer flow control.
|
||||
input var logic pause_req,
|
||||
|
||||
// Pixel event valid flag.
|
||||
output logic pixel_valid,
|
||||
|
||||
// SOF sideband copied from the accepted FIFO word.
|
||||
output logic pixel_sof,
|
||||
|
||||
// Grayscale sample value copied from the accepted FIFO word.
|
||||
output logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Zero-based column coordinate in the original input image.
|
||||
output logic [12:0] pixel_x,
|
||||
|
||||
// Zero-based row coordinate in the original input image.
|
||||
output logic [12:0] pixel_y,
|
||||
|
||||
// First pixel of the current standalone strip frame.
|
||||
output logic strip_first_pixel,
|
||||
|
||||
// Last pixel of the current standalone strip frame.
|
||||
output logic strip_last_pixel,
|
||||
|
||||
// First pixel of the current original input image.
|
||||
output logic image_first_pixel,
|
||||
|
||||
// Last pixel of the current original input image.
|
||||
output logic image_last_pixel,
|
||||
|
||||
// Effective image width after runtime validation and fallback.
|
||||
output logic [12:0] active_pic_col,
|
||||
|
||||
// Effective image height after runtime validation and fallback.
|
||||
output logic [12:0] active_pic_row,
|
||||
|
||||
// Latched ratio for the current original input image.
|
||||
output logic [3:0] active_ratio,
|
||||
|
||||
// High when cfg_pic_col/cfg_pic_row passed validation at SOF.
|
||||
output logic active_cfg_valid,
|
||||
|
||||
// High while this controller is accepting pixels for an original image.
|
||||
output logic image_active
|
||||
);
|
||||
|
||||
localparam int SOF_BIT_INDEX = (PIX_WIDTH == 8) ? 8 : 17;
|
||||
|
||||
// Constant forms used in comparisons and registered outputs.
|
||||
localparam logic [12:0] MIN_PIC_COL_VALUE = 13'd16;
|
||||
localparam logic [12:0] MIN_PIC_ROW_VALUE = 13'd16;
|
||||
localparam logic [12:0] DEFAULT_PIC_COL_VALUE = DEFAULT_PIC_COL;
|
||||
localparam logic [12:0] DEFAULT_PIC_ROW_VALUE = DEFAULT_PIC_ROW;
|
||||
localparam logic [12:0] MAX_PIC_COL_VALUE = MAX_PIC_COL;
|
||||
localparam logic [12:0] MAX_PIC_ROW_VALUE = MAX_PIC_ROW;
|
||||
localparam logic [12:0] SCAN_ROWS_VALUE = SCAN_ROWS;
|
||||
localparam logic [12:0] SCAN_ROWS_LAST_VALUE = SCAN_ROWS - 1;
|
||||
|
||||
// One-cycle read latency marker for the synchronous input FIFO.
|
||||
logic rd_pending;
|
||||
|
||||
// Current coordinate for the next accepted in-frame FIFO word.
|
||||
logic [12:0] x_count;
|
||||
logic [12:0] y_count;
|
||||
logic [12:0] strip_row_count;
|
||||
|
||||
// Runtime configuration validation signals. They are sampled only at SOF.
|
||||
logic cfg_col_in_range;
|
||||
logic cfg_row_in_range;
|
||||
logic cfg_row_aligned;
|
||||
logic cfg_dimension_valid;
|
||||
logic [12:0] cfg_pic_col_selected;
|
||||
logic [12:0] cfg_pic_row_selected;
|
||||
|
||||
// FIFO read control signals.
|
||||
logic fifo_read_allowed;
|
||||
logic pixel_slot_open;
|
||||
logic issue_read;
|
||||
|
||||
// Decoded fields from the FIFO data word.
|
||||
logic fifo_word_sof;
|
||||
logic [PIX_WIDTH-1:0] fifo_word_sample;
|
||||
|
||||
// Coordinate boundary signals for the next accepted in-frame pixel.
|
||||
logic [12:0] active_pic_col_last;
|
||||
logic [12:0] active_pic_row_last;
|
||||
logic x_is_first;
|
||||
logic x_is_last;
|
||||
logic strip_row_is_first;
|
||||
logic y_is_last;
|
||||
logic strip_row_is_last;
|
||||
logic strip_first_active_pixel;
|
||||
|
||||
assign ififo_rclk = clk;
|
||||
assign ififo_rd = issue_read;
|
||||
assign fifo_word_sof = ififo_rdata[SOF_BIT_INDEX];
|
||||
assign fifo_word_sample = ififo_rdata[PIX_WIDTH-1:0];
|
||||
|
||||
always_comb begin
|
||||
cfg_col_in_range = 1'b0;
|
||||
if (cfg_pic_col >= MIN_PIC_COL_VALUE && cfg_pic_col <= MAX_PIC_COL_VALUE) begin
|
||||
cfg_col_in_range = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_row_in_range = 1'b0;
|
||||
if (cfg_pic_row >= MIN_PIC_ROW_VALUE && cfg_pic_row <= MAX_PIC_ROW_VALUE) begin
|
||||
cfg_row_in_range = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_row_aligned = 1'b0;
|
||||
if ((cfg_pic_row % SCAN_ROWS_VALUE) == 13'd0) begin
|
||||
cfg_row_aligned = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_dimension_valid = 1'b0;
|
||||
if (cfg_col_in_range && cfg_row_in_range && cfg_row_aligned) begin
|
||||
cfg_dimension_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
cfg_pic_col_selected = DEFAULT_PIC_COL_VALUE;
|
||||
cfg_pic_row_selected = DEFAULT_PIC_ROW_VALUE;
|
||||
if (cfg_dimension_valid) begin
|
||||
cfg_pic_col_selected = cfg_pic_col;
|
||||
cfg_pic_row_selected = cfg_pic_row;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
fifo_read_allowed = 1'b0;
|
||||
if (!ififo_alempty || !ififo_empty) begin
|
||||
fifo_read_allowed = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_slot_open = 1'b0;
|
||||
if (!pixel_valid || pixel_ready) begin
|
||||
pixel_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
issue_read = 1'b0;
|
||||
if (fifo_read_allowed && pixel_slot_open && !rd_pending && !pause_req) begin
|
||||
issue_read = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
active_pic_col_last = active_pic_col - 13'd1;
|
||||
active_pic_row_last = active_pic_row - 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
x_is_first = 1'b0;
|
||||
if (x_count == 13'd0) begin
|
||||
x_is_first = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
x_is_last = 1'b0;
|
||||
if (x_count == active_pic_col_last) begin
|
||||
x_is_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_row_is_first = 1'b0;
|
||||
if (strip_row_count == 13'd0) begin
|
||||
strip_row_is_first = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
y_is_last = 1'b0;
|
||||
if (y_count == active_pic_row_last) begin
|
||||
y_is_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_row_is_last = 1'b0;
|
||||
if (strip_row_count == SCAN_ROWS_LAST_VALUE) begin
|
||||
strip_row_is_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_first_active_pixel = 1'b0;
|
||||
if (x_is_first && strip_row_is_first) begin
|
||||
strip_first_active_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
rd_pending <= 1'b0;
|
||||
pixel_valid <= 1'b0;
|
||||
pixel_sof <= 1'b0;
|
||||
pixel_sample <= {PIX_WIDTH{1'b0}};
|
||||
pixel_x <= 13'd0;
|
||||
pixel_y <= 13'd0;
|
||||
strip_first_pixel <= 1'b0;
|
||||
strip_last_pixel <= 1'b0;
|
||||
image_first_pixel <= 1'b0;
|
||||
image_last_pixel <= 1'b0;
|
||||
active_pic_col <= DEFAULT_PIC_COL_VALUE;
|
||||
active_pic_row <= DEFAULT_PIC_ROW_VALUE;
|
||||
active_ratio <= 4'd0;
|
||||
active_cfg_valid <= 1'b0;
|
||||
image_active <= 1'b0;
|
||||
x_count <= 13'd0;
|
||||
y_count <= 13'd0;
|
||||
strip_row_count <= 13'd0;
|
||||
end else begin
|
||||
rd_pending <= issue_read;
|
||||
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
pixel_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (rd_pending) begin
|
||||
if (!image_active && fifo_word_sof) begin
|
||||
pixel_valid <= 1'b1;
|
||||
pixel_sof <= fifo_word_sof;
|
||||
pixel_sample <= fifo_word_sample;
|
||||
pixel_x <= 13'd0;
|
||||
pixel_y <= 13'd0;
|
||||
strip_first_pixel <= 1'b1;
|
||||
strip_last_pixel <= 1'b0;
|
||||
image_first_pixel <= 1'b1;
|
||||
image_last_pixel <= 1'b0;
|
||||
active_pic_col <= cfg_pic_col_selected;
|
||||
active_pic_row <= cfg_pic_row_selected;
|
||||
active_ratio <= ratio;
|
||||
active_cfg_valid <= cfg_dimension_valid;
|
||||
image_active <= 1'b1;
|
||||
x_count <= 13'd1;
|
||||
y_count <= 13'd0;
|
||||
strip_row_count <= 13'd0;
|
||||
end else if (image_active) begin
|
||||
pixel_valid <= 1'b1;
|
||||
pixel_sof <= fifo_word_sof;
|
||||
pixel_sample <= fifo_word_sample;
|
||||
pixel_x <= x_count;
|
||||
pixel_y <= y_count;
|
||||
strip_first_pixel <= strip_first_active_pixel;
|
||||
strip_last_pixel <= x_is_last && strip_row_is_last;
|
||||
image_first_pixel <= 1'b0;
|
||||
image_last_pixel <= x_is_last && y_is_last;
|
||||
|
||||
if (x_is_last) begin
|
||||
x_count <= 13'd0;
|
||||
if (y_is_last) begin
|
||||
y_count <= 13'd0;
|
||||
strip_row_count <= 13'd0;
|
||||
image_active <= 1'b0;
|
||||
end else begin
|
||||
y_count <= y_count + 13'd1;
|
||||
if (strip_row_is_last) begin
|
||||
strip_row_count <= 13'd0;
|
||||
end else begin
|
||||
strip_row_count <= strip_row_count + 13'd1;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
x_count <= x_count + 13'd1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
876
fpga/verilog/jls_mode_router.sv
Normal file
876
fpga/verilog/jls_mode_router.sv
Normal file
@@ -0,0 +1,876 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.7 run mode
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Select regular mode or run mode from local gradients
|
||||
// Trace : docs/jls_traceability.md#run-mode
|
||||
// Example : When D1=D2=D3=0 and X=Ra, a run pixel is reconstructed as Ra
|
||||
// and the accumulated run_length is not entropy coded until the
|
||||
// run reaches EOL or an interruption sample.
|
||||
//
|
||||
// First-pass mode router and run scanner. It consumes neighbor events from
|
||||
// jls_neighbor_provider, sends non-run contexts to the regular pipeline, and
|
||||
// accumulates run pixels for jls_run_mode segment encoding. To preserve
|
||||
// entropy order in the later top-level integration, this module stalls regular,
|
||||
// interruption, and EOL segment emission behind an outstanding run segment. It
|
||||
// may still accept later non-EOL matching run pixels because those pixels only
|
||||
// update run_length_accum and reconstructed history; they do not emit entropy
|
||||
// until a later segment boundary.
|
||||
// Once run_length_accum is non-zero, the scanner remains in the standard
|
||||
// Annex A.7 run loop; the next pixel is judged against RUNval/Ra and EOL, not
|
||||
// reclassified by the regular-mode gradient context.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_mode_router #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Neighbor event from jls_neighbor_provider.
|
||||
input var logic pixel_valid,
|
||||
output logic pixel_ready,
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
input var logic [12:0] pixel_x,
|
||||
input var logic [12:0] pixel_y,
|
||||
input var logic pixel_strip_first_pixel,
|
||||
input var logic pixel_strip_last_pixel,
|
||||
input var logic [PIX_WIDTH-1:0] Ra,
|
||||
input var logic [PIX_WIDTH-1:0] Rb,
|
||||
input var logic [PIX_WIDTH-1:0] Rc,
|
||||
input var logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// Active strip width and NEAR for mode/run decisions.
|
||||
input var logic [12:0] strip_width,
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Regular-mode event forwarded to jls_predictor.
|
||||
output logic regular_valid,
|
||||
input var logic regular_ready,
|
||||
output logic [PIX_WIDTH-1:0] regular_sample,
|
||||
output logic [12:0] regular_x,
|
||||
output logic [12:0] regular_y,
|
||||
output logic regular_strip_first_pixel,
|
||||
output logic regular_strip_last_pixel,
|
||||
output logic [PIX_WIDTH-1:0] regular_Ra,
|
||||
output logic [PIX_WIDTH-1:0] regular_Rb,
|
||||
output logic [PIX_WIDTH-1:0] regular_Rc,
|
||||
output logic [PIX_WIDTH-1:0] regular_Rd,
|
||||
|
||||
// Run segment event for jls_run_mode.
|
||||
output logic run_segment_valid,
|
||||
input var logic run_segment_ready,
|
||||
output logic [12:0] run_length,
|
||||
output logic run_end_of_line,
|
||||
output logic run_interruption_valid,
|
||||
output logic [PIX_WIDTH-1:0] run_interruption_sample,
|
||||
output logic [12:0] run_interruption_x,
|
||||
output logic [12:0] run_interruption_y,
|
||||
output logic run_interruption_strip_first_pixel,
|
||||
output logic run_interruption_strip_last_pixel,
|
||||
output logic [PIX_WIDTH-1:0] run_Ra,
|
||||
output logic [PIX_WIDTH-1:0] run_Rb,
|
||||
|
||||
// jls_run_mode segment completion. The router does not accept the next pixel
|
||||
// until the segment's entropy events and optional interruption reconstruction
|
||||
// have completed.
|
||||
input var logic run_segment_done,
|
||||
|
||||
// Direct reconstructed run pixel. Run-interruption reconstruction comes from
|
||||
// jls_run_mode and is muxed at top level with this port and the regular path.
|
||||
output logic run_recon_valid,
|
||||
input var logic run_recon_ready,
|
||||
output logic [PIX_WIDTH-1:0] run_recon_sample,
|
||||
output logic [12:0] run_recon_x,
|
||||
output logic [12:0] run_recon_y
|
||||
);
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
STATE_IDLE = 2'd0,
|
||||
STATE_WAIT_SEG = 2'd1
|
||||
} router_state_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
EVENT_REGULAR = 2'd0,
|
||||
EVENT_RUN_PIXEL = 2'd1,
|
||||
EVENT_RUN_EOF_LINE = 2'd2,
|
||||
EVENT_INTERRUPT = 2'd3
|
||||
} event_kind_e;
|
||||
|
||||
router_state_e state;
|
||||
event_kind_e event_kind;
|
||||
event_kind_e event_kind_next;
|
||||
|
||||
// Accumulated run length since the current run-mode segment started.
|
||||
logic [12:0] run_length_accum;
|
||||
|
||||
// Two-entry elastic input stage. This breaks the long combinational path
|
||||
// from neighbor history values through Annex A.3/A.7 decisions into the
|
||||
// downstream entropy ready chain. The second slot lets pixel_ready depend
|
||||
// only on local queue fullness instead of same-cycle downstream event_accept.
|
||||
logic slot_valid;
|
||||
logic [PIX_WIDTH-1:0] slot_sample;
|
||||
logic [12:0] slot_x;
|
||||
logic [12:0] slot_y;
|
||||
logic slot_strip_first_pixel;
|
||||
logic slot_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] slot_Ra;
|
||||
logic [PIX_WIDTH-1:0] slot_Rb;
|
||||
logic [PIX_WIDTH-1:0] slot_Rc;
|
||||
logic [PIX_WIDTH-1:0] slot_Rd;
|
||||
logic signed [32:0] slot_D1;
|
||||
logic signed [32:0] slot_D2;
|
||||
logic signed [32:0] slot_D3;
|
||||
logic signed [32:0] slot_sample_minus_Ra;
|
||||
logic slot_pixel_is_eol;
|
||||
logic next_slot_valid;
|
||||
logic [PIX_WIDTH-1:0] next_slot_sample;
|
||||
logic [12:0] next_slot_x;
|
||||
logic [12:0] next_slot_y;
|
||||
logic next_slot_strip_first_pixel;
|
||||
logic next_slot_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Ra;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Rb;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Rc;
|
||||
logic [PIX_WIDTH-1:0] next_slot_Rd;
|
||||
logic signed [32:0] next_slot_D1;
|
||||
logic signed [32:0] next_slot_D2;
|
||||
logic signed [32:0] next_slot_D3;
|
||||
logic signed [32:0] next_slot_sample_minus_Ra;
|
||||
logic next_slot_pixel_is_eol;
|
||||
|
||||
// One-entry classified slot. Annex A.3/A.7 gradient and run decisions are
|
||||
// registered here before the event queue sees them; this avoids driving the
|
||||
// event register enables directly from Ra/Rb/Rc/Rd comparison logic.
|
||||
logic class_valid;
|
||||
logic class_slot_open;
|
||||
logic class_can_enqueue;
|
||||
logic class_to_event;
|
||||
logic [PIX_WIDTH-1:0] class_sample;
|
||||
logic [12:0] class_x;
|
||||
logic [12:0] class_y;
|
||||
logic class_strip_first_pixel;
|
||||
logic class_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] class_Ra;
|
||||
logic [PIX_WIDTH-1:0] class_Rb;
|
||||
logic [PIX_WIDTH-1:0] class_Rc;
|
||||
logic [PIX_WIDTH-1:0] class_Rd;
|
||||
event_kind_e class_kind;
|
||||
logic [12:0] class_run_length;
|
||||
logic class_run_end_of_line;
|
||||
logic class_run_interruption_valid;
|
||||
|
||||
// Registered classified event. This is the timing boundary between Annex
|
||||
// A.3/A.7 mode decision and the downstream regular/run entropy pipelines.
|
||||
logic event_valid;
|
||||
logic [PIX_WIDTH-1:0] event_sample;
|
||||
logic [12:0] event_x;
|
||||
logic [12:0] event_y;
|
||||
logic event_strip_first_pixel;
|
||||
logic event_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] event_Ra;
|
||||
logic [PIX_WIDTH-1:0] event_Rb;
|
||||
logic [PIX_WIDTH-1:0] event_Rc;
|
||||
logic [PIX_WIDTH-1:0] event_Rd;
|
||||
logic [12:0] event_run_length;
|
||||
logic event_run_end_of_line;
|
||||
logic event_run_interruption_valid;
|
||||
logic [12:0] event_run_length_next;
|
||||
logic event_run_end_of_line_next;
|
||||
logic event_run_interruption_valid_next;
|
||||
// Second event slot. A filled second slot lets mode classification advance
|
||||
// without using the downstream ready chain in the same cycle.
|
||||
logic event_next_valid;
|
||||
logic [PIX_WIDTH-1:0] event_next_sample;
|
||||
logic [12:0] event_next_x;
|
||||
logic [12:0] event_next_y;
|
||||
logic event_next_strip_first_pixel;
|
||||
logic event_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] event_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] event_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] event_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] event_next_Rd;
|
||||
event_kind_e event_next_kind;
|
||||
logic [12:0] event_next_run_length;
|
||||
logic event_next_run_end_of_line;
|
||||
logic event_next_run_interruption_valid;
|
||||
|
||||
// Current input-pixel arithmetic. Only the first subtract layer is computed
|
||||
// before the slot register; the absolute-value / compare tree for Annex
|
||||
// A.3/A.7 runs one cycle later from slot_D1/slot_D2/slot_D3.
|
||||
logic signed [32:0] pixel_Ra_ext;
|
||||
logic signed [32:0] pixel_Rb_ext;
|
||||
logic signed [32:0] pixel_Rc_ext;
|
||||
logic signed [32:0] pixel_Rd_ext;
|
||||
logic signed [32:0] pixel_sample_ext;
|
||||
logic signed [32:0] near_ext33;
|
||||
logic signed [32:0] pixel_D1;
|
||||
logic signed [32:0] pixel_D2;
|
||||
logic signed [32:0] pixel_D3;
|
||||
logic signed [32:0] pixel_sample_minus_Ra;
|
||||
logic signed [32:0] slot_abs_D1;
|
||||
logic signed [32:0] slot_abs_D2;
|
||||
logic signed [32:0] slot_abs_D3;
|
||||
logic signed [32:0] slot_abs_sample_minus_Ra;
|
||||
logic slot_run_context_eval;
|
||||
logic slot_sample_matches_Ra_eval;
|
||||
logic pixel_is_eol_next;
|
||||
logic run_mode_active;
|
||||
logic slot_to_class;
|
||||
logic event_accept;
|
||||
logic slot_accept;
|
||||
logic load_pixel;
|
||||
logic input_queue_full;
|
||||
logic run_pixel_accept;
|
||||
logic interruption_accept;
|
||||
logic regular_accept;
|
||||
logic run_eol_segment_accept;
|
||||
logic [12:0] run_length_with_current;
|
||||
logic event_queue_full;
|
||||
logic promote_next_event;
|
||||
logic load_event_front_from_class;
|
||||
logic load_event_next_from_class;
|
||||
|
||||
// Padding for supported PIX_WIDTH values into 33-bit signed arithmetic.
|
||||
localparam int SAMPLE_EXT_PAD_WIDTH = 33 - PIX_WIDTH;
|
||||
|
||||
always_comb begin
|
||||
pixel_Ra_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Ra});
|
||||
pixel_Rb_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rb});
|
||||
pixel_Rc_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rc});
|
||||
pixel_Rd_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, Rd});
|
||||
pixel_sample_ext = $signed({{SAMPLE_EXT_PAD_WIDTH{1'b0}}, pixel_sample});
|
||||
near_ext33 = $signed({27'd0, NEAR});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_D1 = pixel_Rd_ext - pixel_Rb_ext;
|
||||
pixel_D2 = pixel_Rb_ext - pixel_Rc_ext;
|
||||
pixel_D3 = pixel_Rc_ext - pixel_Ra_ext;
|
||||
pixel_sample_minus_Ra = pixel_sample_ext - pixel_Ra_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_D1 = slot_D1;
|
||||
if (slot_D1 < 33'sd0) begin
|
||||
slot_abs_D1 = -slot_D1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_D2 = slot_D2;
|
||||
if (slot_D2 < 33'sd0) begin
|
||||
slot_abs_D2 = -slot_D2;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_D3 = slot_D3;
|
||||
if (slot_D3 < 33'sd0) begin
|
||||
slot_abs_D3 = -slot_D3;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_abs_sample_minus_Ra = slot_sample_minus_Ra;
|
||||
if (slot_sample_minus_Ra < 33'sd0) begin
|
||||
slot_abs_sample_minus_Ra = -slot_sample_minus_Ra;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_run_context_eval = 1'b0;
|
||||
if (slot_abs_D1 <= near_ext33 &&
|
||||
slot_abs_D2 <= near_ext33 &&
|
||||
slot_abs_D3 <= near_ext33) begin
|
||||
slot_run_context_eval = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_sample_matches_Ra_eval = 1'b0;
|
||||
if (slot_abs_sample_minus_Ra <= near_ext33) begin
|
||||
slot_sample_matches_Ra_eval = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_is_eol_next = 1'b0;
|
||||
if (pixel_x == (strip_width - 13'd1)) begin
|
||||
pixel_is_eol_next = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_mode_active = slot_run_context_eval;
|
||||
if (run_length_accum != 13'd0) begin
|
||||
run_mode_active = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_length_with_current = run_length_accum + 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
regular_valid = 1'b0;
|
||||
if (event_valid && event_kind == EVENT_REGULAR) begin
|
||||
regular_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_recon_valid = 1'b0;
|
||||
if (event_valid &&
|
||||
(event_kind == EVENT_RUN_PIXEL || event_kind == EVENT_RUN_EOF_LINE)) begin
|
||||
run_recon_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_segment_valid = 1'b0;
|
||||
if (event_valid &&
|
||||
(event_kind == EVENT_RUN_EOF_LINE || event_kind == EVENT_INTERRUPT)) begin
|
||||
run_segment_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
regular_accept = 1'b0;
|
||||
if (regular_valid && regular_ready) begin
|
||||
regular_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_pixel_accept = 1'b0;
|
||||
if (run_recon_valid && run_recon_ready && event_kind == EVENT_RUN_PIXEL) begin
|
||||
run_pixel_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_eol_segment_accept = 1'b0;
|
||||
if (run_recon_valid && run_recon_ready && run_segment_valid && run_segment_ready &&
|
||||
event_kind == EVENT_RUN_EOF_LINE) begin
|
||||
run_eol_segment_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
interruption_accept = 1'b0;
|
||||
if (run_segment_valid && run_segment_ready && event_kind == EVENT_INTERRUPT) begin
|
||||
interruption_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_accept = 1'b0;
|
||||
case (1'b1)
|
||||
(regular_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
(run_pixel_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
(run_eol_segment_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
(interruption_accept): begin
|
||||
event_accept = 1'b1;
|
||||
end
|
||||
|
||||
default: begin
|
||||
event_accept = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_accept = slot_to_class;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_kind_next = EVENT_REGULAR;
|
||||
if (run_mode_active) begin
|
||||
if (slot_sample_matches_Ra_eval && slot_pixel_is_eol) begin
|
||||
event_kind_next = EVENT_RUN_EOF_LINE;
|
||||
end else if (slot_sample_matches_Ra_eval) begin
|
||||
event_kind_next = EVENT_RUN_PIXEL;
|
||||
end else begin
|
||||
event_kind_next = EVENT_INTERRUPT;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_run_length_next = run_length_accum;
|
||||
event_run_end_of_line_next = 1'b0;
|
||||
event_run_interruption_valid_next = 1'b1;
|
||||
if (event_kind_next == EVENT_RUN_EOF_LINE) begin
|
||||
event_run_length_next = run_length_with_current;
|
||||
event_run_end_of_line_next = 1'b1;
|
||||
event_run_interruption_valid_next = 1'b0;
|
||||
end else if (event_kind_next == EVENT_RUN_PIXEL) begin
|
||||
event_run_length_next = run_length_with_current;
|
||||
event_run_end_of_line_next = 1'b0;
|
||||
event_run_interruption_valid_next = 1'b0;
|
||||
end else if (event_kind_next == EVENT_REGULAR) begin
|
||||
event_run_interruption_valid_next = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_queue_full = 1'b0;
|
||||
if (slot_valid && next_slot_valid) begin
|
||||
input_queue_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
event_queue_full = 1'b0;
|
||||
if (event_valid && event_next_valid) begin
|
||||
event_queue_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
class_slot_open = 1'b0;
|
||||
if (!class_valid || (class_to_event && class_kind == EVENT_REGULAR)) begin
|
||||
class_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_to_class = 1'b0;
|
||||
if (slot_valid && class_slot_open) begin
|
||||
slot_to_class = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
class_can_enqueue = 1'b0;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
class_can_enqueue = 1'b1;
|
||||
end
|
||||
|
||||
STATE_WAIT_SEG: begin
|
||||
if (class_kind == EVENT_RUN_PIXEL) begin
|
||||
class_can_enqueue = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
class_can_enqueue = 1'b0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
class_to_event = 1'b0;
|
||||
if (class_valid && class_can_enqueue && !event_queue_full) begin
|
||||
class_to_event = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
promote_next_event = 1'b0;
|
||||
if (!event_valid && event_next_valid) begin
|
||||
promote_next_event = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_event_front_from_class = 1'b0;
|
||||
if (class_to_event && !event_valid && !event_next_valid) begin
|
||||
load_event_front_from_class = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_event_next_from_class = 1'b0;
|
||||
if (class_to_event && (event_valid || event_next_valid)) begin
|
||||
load_event_next_from_class = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = 1'b0;
|
||||
if (!input_queue_full) begin
|
||||
pixel_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
load_pixel = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
load_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
regular_sample = event_sample;
|
||||
regular_x = event_x;
|
||||
regular_y = event_y;
|
||||
regular_strip_first_pixel = event_strip_first_pixel;
|
||||
regular_strip_last_pixel = event_strip_last_pixel;
|
||||
regular_Ra = event_Ra;
|
||||
regular_Rb = event_Rb;
|
||||
regular_Rc = event_Rc;
|
||||
regular_Rd = event_Rd;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_length = event_run_length;
|
||||
run_end_of_line = event_run_end_of_line;
|
||||
run_interruption_valid = event_run_interruption_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_interruption_sample = event_sample;
|
||||
run_interruption_x = event_x;
|
||||
run_interruption_y = event_y;
|
||||
run_interruption_strip_first_pixel = event_strip_first_pixel;
|
||||
run_interruption_strip_last_pixel = event_strip_last_pixel;
|
||||
run_Ra = event_Ra;
|
||||
run_Rb = event_Rb;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
run_recon_sample = event_Ra;
|
||||
run_recon_x = event_x;
|
||||
run_recon_y = event_y;
|
||||
end
|
||||
|
||||
/*
|
||||
* The event register above owns all external regular/run outputs. The older
|
||||
* direct slot-to-output combinational blocks are intentionally absent; this
|
||||
* keeps slot_Ra/Rb/Rc/Rd comparison logic from feeding top-level entropy
|
||||
* counters or run-mode DSP inputs in the same cycle.
|
||||
*/
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
run_length_accum <= 13'd0;
|
||||
slot_valid <= 1'b0;
|
||||
slot_sample <= {PIX_WIDTH{1'b0}};
|
||||
slot_x <= 13'd0;
|
||||
slot_y <= 13'd0;
|
||||
slot_strip_first_pixel <= 1'b0;
|
||||
slot_strip_last_pixel <= 1'b0;
|
||||
slot_Ra <= {PIX_WIDTH{1'b0}};
|
||||
slot_Rb <= {PIX_WIDTH{1'b0}};
|
||||
slot_Rc <= {PIX_WIDTH{1'b0}};
|
||||
slot_Rd <= {PIX_WIDTH{1'b0}};
|
||||
slot_D1 <= 33'sd0;
|
||||
slot_D2 <= 33'sd0;
|
||||
slot_D3 <= 33'sd0;
|
||||
slot_sample_minus_Ra <= 33'sd0;
|
||||
slot_pixel_is_eol <= 1'b0;
|
||||
next_slot_valid <= 1'b0;
|
||||
next_slot_sample <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_x <= 13'd0;
|
||||
next_slot_y <= 13'd0;
|
||||
next_slot_strip_first_pixel <= 1'b0;
|
||||
next_slot_strip_last_pixel <= 1'b0;
|
||||
next_slot_Ra <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_Rb <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_Rc <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_Rd <= {PIX_WIDTH{1'b0}};
|
||||
next_slot_D1 <= 33'sd0;
|
||||
next_slot_D2 <= 33'sd0;
|
||||
next_slot_D3 <= 33'sd0;
|
||||
next_slot_sample_minus_Ra <= 33'sd0;
|
||||
next_slot_pixel_is_eol <= 1'b0;
|
||||
class_valid <= 1'b0;
|
||||
class_sample <= {PIX_WIDTH{1'b0}};
|
||||
class_x <= 13'd0;
|
||||
class_y <= 13'd0;
|
||||
class_strip_first_pixel <= 1'b0;
|
||||
class_strip_last_pixel <= 1'b0;
|
||||
class_Ra <= {PIX_WIDTH{1'b0}};
|
||||
class_Rb <= {PIX_WIDTH{1'b0}};
|
||||
class_Rc <= {PIX_WIDTH{1'b0}};
|
||||
class_Rd <= {PIX_WIDTH{1'b0}};
|
||||
class_kind <= EVENT_REGULAR;
|
||||
class_run_length <= 13'd0;
|
||||
class_run_end_of_line <= 1'b0;
|
||||
class_run_interruption_valid <= 1'b0;
|
||||
event_kind <= EVENT_REGULAR;
|
||||
event_valid <= 1'b0;
|
||||
event_sample <= {PIX_WIDTH{1'b0}};
|
||||
event_x <= 13'd0;
|
||||
event_y <= 13'd0;
|
||||
event_strip_first_pixel <= 1'b0;
|
||||
event_strip_last_pixel <= 1'b0;
|
||||
event_Ra <= {PIX_WIDTH{1'b0}};
|
||||
event_Rb <= {PIX_WIDTH{1'b0}};
|
||||
event_Rc <= {PIX_WIDTH{1'b0}};
|
||||
event_Rd <= {PIX_WIDTH{1'b0}};
|
||||
event_run_length <= 13'd0;
|
||||
event_run_end_of_line <= 1'b0;
|
||||
event_run_interruption_valid <= 1'b0;
|
||||
event_next_valid <= 1'b0;
|
||||
event_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
event_next_x <= 13'd0;
|
||||
event_next_y <= 13'd0;
|
||||
event_next_strip_first_pixel <= 1'b0;
|
||||
event_next_strip_last_pixel <= 1'b0;
|
||||
event_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
event_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
event_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
event_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
event_next_kind <= EVENT_REGULAR;
|
||||
event_next_run_length <= 13'd0;
|
||||
event_next_run_end_of_line <= 1'b0;
|
||||
event_next_run_interruption_valid <= 1'b0;
|
||||
end else begin
|
||||
if (state == STATE_WAIT_SEG && run_segment_done) begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
|
||||
if (event_accept) begin
|
||||
event_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (class_to_event) begin
|
||||
class_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (slot_to_class) begin
|
||||
class_valid <= 1'b1;
|
||||
class_sample <= slot_sample;
|
||||
class_x <= slot_x;
|
||||
class_y <= slot_y;
|
||||
class_strip_first_pixel <= slot_strip_first_pixel;
|
||||
class_strip_last_pixel <= slot_strip_last_pixel;
|
||||
class_Ra <= slot_Ra;
|
||||
class_Rb <= slot_Rb;
|
||||
class_Rc <= slot_Rc;
|
||||
class_Rd <= slot_Rd;
|
||||
class_kind <= event_kind_next;
|
||||
class_run_length <= event_run_length_next;
|
||||
class_run_end_of_line <= event_run_end_of_line_next;
|
||||
class_run_interruption_valid <= event_run_interruption_valid_next;
|
||||
end
|
||||
|
||||
if (promote_next_event) begin
|
||||
event_valid <= 1'b1;
|
||||
event_kind <= event_next_kind;
|
||||
event_sample <= event_next_sample;
|
||||
event_x <= event_next_x;
|
||||
event_y <= event_next_y;
|
||||
event_strip_first_pixel <= event_next_strip_first_pixel;
|
||||
event_strip_last_pixel <= event_next_strip_last_pixel;
|
||||
event_Ra <= event_next_Ra;
|
||||
event_Rb <= event_next_Rb;
|
||||
event_Rc <= event_next_Rc;
|
||||
event_Rd <= event_next_Rd;
|
||||
event_run_length <= event_next_run_length;
|
||||
event_run_end_of_line <= event_next_run_end_of_line;
|
||||
event_run_interruption_valid <= event_next_run_interruption_valid;
|
||||
event_next_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (load_event_front_from_class) begin
|
||||
event_valid <= 1'b1;
|
||||
event_kind <= class_kind;
|
||||
event_sample <= class_sample;
|
||||
event_x <= class_x;
|
||||
event_y <= class_y;
|
||||
event_strip_first_pixel <= class_strip_first_pixel;
|
||||
event_strip_last_pixel <= class_strip_last_pixel;
|
||||
event_Ra <= class_Ra;
|
||||
event_Rb <= class_Rb;
|
||||
event_Rc <= class_Rc;
|
||||
event_Rd <= class_Rd;
|
||||
event_run_length <= class_run_length;
|
||||
event_run_end_of_line <= class_run_end_of_line;
|
||||
event_run_interruption_valid <= class_run_interruption_valid;
|
||||
end
|
||||
|
||||
if (load_event_next_from_class) begin
|
||||
event_next_valid <= 1'b1;
|
||||
event_next_kind <= class_kind;
|
||||
event_next_sample <= class_sample;
|
||||
event_next_x <= class_x;
|
||||
event_next_y <= class_y;
|
||||
event_next_strip_first_pixel <= class_strip_first_pixel;
|
||||
event_next_strip_last_pixel <= class_strip_last_pixel;
|
||||
event_next_Ra <= class_Ra;
|
||||
event_next_Rb <= class_Rb;
|
||||
event_next_Rc <= class_Rc;
|
||||
event_next_Rd <= class_Rd;
|
||||
event_next_run_length <= class_run_length;
|
||||
event_next_run_end_of_line <= class_run_end_of_line;
|
||||
event_next_run_interruption_valid <= class_run_interruption_valid;
|
||||
end
|
||||
|
||||
if (class_to_event) begin
|
||||
case (class_kind)
|
||||
EVENT_RUN_PIXEL: begin
|
||||
run_length_accum <= class_run_length;
|
||||
end
|
||||
|
||||
EVENT_RUN_EOF_LINE, EVENT_INTERRUPT: begin
|
||||
run_length_accum <= 13'd0;
|
||||
state <= STATE_WAIT_SEG;
|
||||
end
|
||||
|
||||
default: begin
|
||||
run_length_accum <= run_length_accum;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
case ({slot_to_class, slot_valid, next_slot_valid, load_pixel})
|
||||
4'b1100: begin
|
||||
slot_valid <= 1'b0;
|
||||
end
|
||||
|
||||
4'b1101: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= pixel_sample;
|
||||
slot_x <= pixel_x;
|
||||
slot_y <= pixel_y;
|
||||
slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
slot_Ra <= Ra;
|
||||
slot_Rb <= Rb;
|
||||
slot_Rc <= Rc;
|
||||
slot_Rd <= Rd;
|
||||
slot_D1 <= pixel_D1;
|
||||
slot_D2 <= pixel_D2;
|
||||
slot_D3 <= pixel_D3;
|
||||
slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
4'b1110: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= next_slot_sample;
|
||||
slot_x <= next_slot_x;
|
||||
slot_y <= next_slot_y;
|
||||
slot_strip_first_pixel <= next_slot_strip_first_pixel;
|
||||
slot_strip_last_pixel <= next_slot_strip_last_pixel;
|
||||
slot_Ra <= next_slot_Ra;
|
||||
slot_Rb <= next_slot_Rb;
|
||||
slot_Rc <= next_slot_Rc;
|
||||
slot_Rd <= next_slot_Rd;
|
||||
slot_D1 <= next_slot_D1;
|
||||
slot_D2 <= next_slot_D2;
|
||||
slot_D3 <= next_slot_D3;
|
||||
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= next_slot_pixel_is_eol;
|
||||
next_slot_valid <= 1'b0;
|
||||
end
|
||||
|
||||
4'b1111: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= next_slot_sample;
|
||||
slot_x <= next_slot_x;
|
||||
slot_y <= next_slot_y;
|
||||
slot_strip_first_pixel <= next_slot_strip_first_pixel;
|
||||
slot_strip_last_pixel <= next_slot_strip_last_pixel;
|
||||
slot_Ra <= next_slot_Ra;
|
||||
slot_Rb <= next_slot_Rb;
|
||||
slot_Rc <= next_slot_Rc;
|
||||
slot_Rd <= next_slot_Rd;
|
||||
slot_D1 <= next_slot_D1;
|
||||
slot_D2 <= next_slot_D2;
|
||||
slot_D3 <= next_slot_D3;
|
||||
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= next_slot_pixel_is_eol;
|
||||
next_slot_valid <= 1'b1;
|
||||
next_slot_sample <= pixel_sample;
|
||||
next_slot_x <= pixel_x;
|
||||
next_slot_y <= pixel_y;
|
||||
next_slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
next_slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
next_slot_Ra <= Ra;
|
||||
next_slot_Rb <= Rb;
|
||||
next_slot_Rc <= Rc;
|
||||
next_slot_Rd <= Rd;
|
||||
next_slot_D1 <= pixel_D1;
|
||||
next_slot_D2 <= pixel_D2;
|
||||
next_slot_D3 <= pixel_D3;
|
||||
next_slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
next_slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
4'b0001: begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= pixel_sample;
|
||||
slot_x <= pixel_x;
|
||||
slot_y <= pixel_y;
|
||||
slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
slot_Ra <= Ra;
|
||||
slot_Rb <= Rb;
|
||||
slot_Rc <= Rc;
|
||||
slot_Rd <= Rd;
|
||||
slot_D1 <= pixel_D1;
|
||||
slot_D2 <= pixel_D2;
|
||||
slot_D3 <= pixel_D3;
|
||||
slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
4'b0101, 4'b0111: begin
|
||||
next_slot_valid <= 1'b1;
|
||||
next_slot_sample <= pixel_sample;
|
||||
next_slot_x <= pixel_x;
|
||||
next_slot_y <= pixel_y;
|
||||
next_slot_strip_first_pixel <= pixel_strip_first_pixel;
|
||||
next_slot_strip_last_pixel <= pixel_strip_last_pixel;
|
||||
next_slot_Ra <= Ra;
|
||||
next_slot_Rb <= Rb;
|
||||
next_slot_Rc <= Rc;
|
||||
next_slot_Rd <= Rd;
|
||||
next_slot_D1 <= pixel_D1;
|
||||
next_slot_D2 <= pixel_D2;
|
||||
next_slot_D3 <= pixel_D3;
|
||||
next_slot_sample_minus_Ra <= pixel_sample_minus_Ra;
|
||||
next_slot_pixel_is_eol <= pixel_is_eol_next;
|
||||
end
|
||||
|
||||
default: begin
|
||||
slot_valid <= slot_valid;
|
||||
next_slot_valid <= next_slot_valid;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (!slot_valid && next_slot_valid && !slot_to_class && !load_pixel) begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= next_slot_sample;
|
||||
slot_x <= next_slot_x;
|
||||
slot_y <= next_slot_y;
|
||||
slot_strip_first_pixel <= next_slot_strip_first_pixel;
|
||||
slot_strip_last_pixel <= next_slot_strip_last_pixel;
|
||||
slot_Ra <= next_slot_Ra;
|
||||
slot_Rb <= next_slot_Rb;
|
||||
slot_Rc <= next_slot_Rc;
|
||||
slot_Rd <= next_slot_Rd;
|
||||
slot_D1 <= next_slot_D1;
|
||||
slot_D2 <= next_slot_D2;
|
||||
slot_D3 <= next_slot_D3;
|
||||
slot_sample_minus_Ra <= next_slot_sample_minus_Ra;
|
||||
slot_pixel_is_eol <= next_slot_pixel_is_eol;
|
||||
next_slot_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
264
fpga/verilog/jls_near_ctrl.sv
Normal file
264
fpga/verilog/jls_near_ctrl.sv
Normal file
@@ -0,0 +1,264 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.3 scan header NEAR parameter; Annex A uses NEAR in coding
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Project dynamic NEAR control around the standard NEAR parameter
|
||||
// Trace : docs/jls_traceability.md#dynamic-near-control
|
||||
// Example : For ratio=2, target bits are source bits divided by 4.
|
||||
//
|
||||
// Dynamic NEAR controller. This project-specific controller keeps NEAR at 0
|
||||
// for lossless/invalid ratios and applies a simple cumulative actual-vs-target
|
||||
// step after each standalone strip frame is fully output.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_near_ctrl #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Maximum dynamic NEAR allowed by the first RTL version.
|
||||
parameter int MAX_NEAR = 31
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// First strip of a new original image; resets dynamic NEAR to 0.
|
||||
input var logic image_start_valid,
|
||||
|
||||
// Runtime ratio sampled for the new original image.
|
||||
input var logic [3:0] image_ratio,
|
||||
|
||||
// Current strip frame is completely output, including header, payload, and EOI.
|
||||
input var logic strip_done_valid,
|
||||
|
||||
// Number of original-image pixels in the completed strip frame.
|
||||
input var logic [31:0] strip_pixel_count,
|
||||
|
||||
// Number of output bytes generated by the completed strip frame.
|
||||
input var logic [31:0] strip_output_bytes,
|
||||
|
||||
// NEAR value to use for the next strip frame header and coding pipeline.
|
||||
output logic [5:0] current_near,
|
||||
|
||||
// Cumulative actual output bits for verification and reporting.
|
||||
output logic [47:0] actual_bits_cumulative,
|
||||
|
||||
// Cumulative target bits for verification and reporting.
|
||||
output logic [47:0] target_bits_cumulative,
|
||||
|
||||
// Sticky report flag: target still missed while NEAR was already at MAX_NEAR.
|
||||
output logic target_miss_at_max_near,
|
||||
|
||||
// One-cycle delayed update is active. The top level holds the next strip
|
||||
// start while this is high so the scan header observes the updated NEAR.
|
||||
output logic update_busy
|
||||
);
|
||||
|
||||
// Ratio encodings follow the SRS ratio port definition.
|
||||
localparam logic [3:0] RATIO_LOSSLESS = 4'd0;
|
||||
localparam logic [3:0] RATIO_1_TO_2 = 4'd1;
|
||||
localparam logic [3:0] RATIO_1_TO_4 = 4'd2;
|
||||
localparam logic [3:0] RATIO_1_TO_8 = 4'd3;
|
||||
|
||||
// Saturated project maximum NEAR value.
|
||||
localparam logic [5:0] MAX_NEAR_VALUE = MAX_NEAR[5:0];
|
||||
|
||||
// Latched ratio for the current original image.
|
||||
logic [3:0] active_ratio;
|
||||
|
||||
// Strip-level source and target bit calculations.
|
||||
logic [47:0] strip_pixel_count_ext;
|
||||
logic [47:0] strip_source_bits;
|
||||
logic [47:0] strip_target_bits;
|
||||
logic [47:0] strip_actual_bits;
|
||||
|
||||
// Cumulative sums after adding the current completed strip.
|
||||
logic [47:0] actual_bits_sum;
|
||||
logic [47:0] target_bits_sum;
|
||||
|
||||
// Registered strip-completion update. This splits the 48-bit adders from
|
||||
// the actual-vs-target compare and NEAR step logic for 250 MHz timing.
|
||||
logic pending_update_valid;
|
||||
logic [47:0] pending_actual_bits_sum;
|
||||
logic [47:0] pending_target_bits_sum;
|
||||
logic pending_ratio_is_lossless_or_invalid;
|
||||
|
||||
// Ratio classification and NEAR update decisions.
|
||||
logic ratio_is_lossless_or_invalid;
|
||||
logic actual_over_target;
|
||||
logic actual_under_target;
|
||||
logic near_can_increase;
|
||||
logic near_can_decrease;
|
||||
logic near_is_max;
|
||||
|
||||
always_comb begin
|
||||
update_busy = pending_update_valid;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_pixel_count_ext = {16'd0, strip_pixel_count};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_source_bits = {48{1'b0}};
|
||||
case (PIX_WIDTH)
|
||||
8: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000};
|
||||
end
|
||||
|
||||
10: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
|
||||
{strip_pixel_count_ext[46:0], 1'b0};
|
||||
end
|
||||
|
||||
12: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
|
||||
{strip_pixel_count_ext[45:0], 2'b00};
|
||||
end
|
||||
|
||||
14: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[44:0], 3'b000} +
|
||||
{strip_pixel_count_ext[45:0], 2'b00} +
|
||||
{strip_pixel_count_ext[46:0], 1'b0};
|
||||
end
|
||||
|
||||
16: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[43:0], 4'b0000};
|
||||
end
|
||||
|
||||
default: begin
|
||||
strip_source_bits = {strip_pixel_count_ext[43:0], 4'b0000};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_target_bits = strip_source_bits;
|
||||
case (active_ratio)
|
||||
RATIO_1_TO_2: begin
|
||||
strip_target_bits = {1'b0, strip_source_bits[47:1]};
|
||||
end
|
||||
|
||||
RATIO_1_TO_4: begin
|
||||
strip_target_bits = {2'b00, strip_source_bits[47:2]};
|
||||
end
|
||||
|
||||
RATIO_1_TO_8: begin
|
||||
strip_target_bits = {3'b000, strip_source_bits[47:3]};
|
||||
end
|
||||
|
||||
default: begin
|
||||
strip_target_bits = strip_source_bits;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_actual_bits = {13'd0, strip_output_bytes, 3'b000};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
actual_bits_sum = actual_bits_cumulative + strip_actual_bits;
|
||||
target_bits_sum = target_bits_cumulative + strip_target_bits;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ratio_is_lossless_or_invalid = 1'b0;
|
||||
case (active_ratio)
|
||||
RATIO_1_TO_2: ratio_is_lossless_or_invalid = 1'b0;
|
||||
RATIO_1_TO_4: ratio_is_lossless_or_invalid = 1'b0;
|
||||
RATIO_1_TO_8: ratio_is_lossless_or_invalid = 1'b0;
|
||||
default: ratio_is_lossless_or_invalid = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
actual_over_target = 1'b0;
|
||||
if (pending_actual_bits_sum > pending_target_bits_sum) begin
|
||||
actual_over_target = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
actual_under_target = 1'b0;
|
||||
if (pending_actual_bits_sum < pending_target_bits_sum) begin
|
||||
actual_under_target = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_is_max = 1'b0;
|
||||
if (current_near >= MAX_NEAR_VALUE) begin
|
||||
near_is_max = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_can_increase = 1'b0;
|
||||
if (!pending_ratio_is_lossless_or_invalid && actual_over_target && !near_is_max) begin
|
||||
near_can_increase = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_can_decrease = 1'b0;
|
||||
if (!pending_ratio_is_lossless_or_invalid && actual_under_target &&
|
||||
current_near != 6'd0) begin
|
||||
near_can_decrease = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
active_ratio <= RATIO_LOSSLESS;
|
||||
current_near <= 6'd0;
|
||||
actual_bits_cumulative <= 48'd0;
|
||||
target_bits_cumulative <= 48'd0;
|
||||
target_miss_at_max_near <= 1'b0;
|
||||
pending_update_valid <= 1'b0;
|
||||
pending_actual_bits_sum <= 48'd0;
|
||||
pending_target_bits_sum <= 48'd0;
|
||||
pending_ratio_is_lossless_or_invalid <= 1'b1;
|
||||
end else begin
|
||||
if (image_start_valid) begin
|
||||
active_ratio <= image_ratio;
|
||||
current_near <= 6'd0;
|
||||
actual_bits_cumulative <= 48'd0;
|
||||
target_bits_cumulative <= 48'd0;
|
||||
target_miss_at_max_near <= 1'b0;
|
||||
pending_update_valid <= 1'b0;
|
||||
pending_actual_bits_sum <= 48'd0;
|
||||
pending_target_bits_sum <= 48'd0;
|
||||
pending_ratio_is_lossless_or_invalid <= 1'b1;
|
||||
end else if (pending_update_valid) begin
|
||||
actual_bits_cumulative <= pending_actual_bits_sum;
|
||||
target_bits_cumulative <= pending_target_bits_sum;
|
||||
|
||||
if (pending_ratio_is_lossless_or_invalid) begin
|
||||
current_near <= 6'd0;
|
||||
end else if (near_can_increase) begin
|
||||
current_near <= current_near + 6'd1;
|
||||
end else if (near_can_decrease) begin
|
||||
current_near <= current_near - 6'd1;
|
||||
end
|
||||
|
||||
if (!pending_ratio_is_lossless_or_invalid && actual_over_target && near_is_max) begin
|
||||
target_miss_at_max_near <= 1'b1;
|
||||
end
|
||||
|
||||
pending_update_valid <= 1'b0;
|
||||
end else if (strip_done_valid) begin
|
||||
pending_update_valid <= 1'b1;
|
||||
pending_actual_bits_sum <= actual_bits_sum;
|
||||
pending_target_bits_sum <= target_bits_sum;
|
||||
pending_ratio_is_lossless_or_invalid <= ratio_is_lossless_or_invalid;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
79
fpga/verilog/jls_near_scale_mul.sv
Normal file
79
fpga/verilog/jls_near_scale_mul.sv
Normal file
@@ -0,0 +1,79 @@
|
||||
// Standard : Helper for JPEG-LS Annex A.5/A.6/A.7 odd-scale products
|
||||
// Clause : N/A helper used by multiple Annex arithmetic stages
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : product = multiplicand * (2 * NEAR + 1)
|
||||
// Trace : docs/jls_traceability.md#regular-error-quantization
|
||||
// Example : multiplicand=5 and near_scale=5 gives 25.
|
||||
//
|
||||
// The JPEG-LS NEAR scale is always an odd 6-bit positive value in the range
|
||||
// 1..63. Vivado tended to map these narrow-scale multiplies into cascaded
|
||||
// DSP48E1 structures, which put PCOUT->PCIN on the top timing path. This
|
||||
// helper keeps the operation in carry chains with a fixed three-adder shape:
|
||||
// one partial sum for bits [2:0], one partial sum for bits [5:3], then a final
|
||||
// add. The caller provides the surrounding pipeline registers.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_near_scale_mul #(
|
||||
parameter int INPUT_WIDTH = 33,
|
||||
parameter int OUTPUT_WIDTH = 41
|
||||
) (
|
||||
input var logic signed [INPUT_WIDTH-1:0] multiplicand_i,
|
||||
input var logic [5:0] near_scale_i,
|
||||
output logic signed [OUTPUT_WIDTH-1:0] product_o
|
||||
);
|
||||
|
||||
localparam int EXTEND_WIDTH = OUTPUT_WIDTH - INPUT_WIDTH;
|
||||
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_ext;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_1;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_2;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_3;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_4;
|
||||
logic signed [OUTPUT_WIDTH-1:0] multiplicand_shift_5;
|
||||
logic signed [OUTPUT_WIDTH-1:0] partial_lo;
|
||||
logic signed [OUTPUT_WIDTH-1:0] partial_hi;
|
||||
|
||||
always_comb begin
|
||||
multiplicand_ext = {{EXTEND_WIDTH{multiplicand_i[INPUT_WIDTH-1]}}, multiplicand_i};
|
||||
multiplicand_shift_1 = multiplicand_ext <<< 1;
|
||||
multiplicand_shift_2 = multiplicand_ext <<< 2;
|
||||
multiplicand_shift_3 = multiplicand_ext <<< 3;
|
||||
multiplicand_shift_4 = multiplicand_ext <<< 4;
|
||||
multiplicand_shift_5 = multiplicand_ext <<< 5;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (near_scale_i[2:0])
|
||||
3'b000: partial_lo = {OUTPUT_WIDTH{1'b0}};
|
||||
3'b001: partial_lo = multiplicand_ext;
|
||||
3'b010: partial_lo = multiplicand_shift_1;
|
||||
3'b011: partial_lo = multiplicand_ext + multiplicand_shift_1;
|
||||
3'b100: partial_lo = multiplicand_shift_2;
|
||||
3'b101: partial_lo = multiplicand_ext + multiplicand_shift_2;
|
||||
3'b110: partial_lo = multiplicand_shift_1 + multiplicand_shift_2;
|
||||
default: partial_lo = multiplicand_ext + multiplicand_shift_1 + multiplicand_shift_2;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (near_scale_i[5:3])
|
||||
3'b000: partial_hi = {OUTPUT_WIDTH{1'b0}};
|
||||
3'b001: partial_hi = multiplicand_shift_3;
|
||||
3'b010: partial_hi = multiplicand_shift_4;
|
||||
3'b011: partial_hi = multiplicand_shift_3 + multiplicand_shift_4;
|
||||
3'b100: partial_hi = multiplicand_shift_5;
|
||||
3'b101: partial_hi = multiplicand_shift_3 + multiplicand_shift_5;
|
||||
3'b110: partial_hi = multiplicand_shift_4 + multiplicand_shift_5;
|
||||
default: partial_hi = multiplicand_shift_3 + multiplicand_shift_4 + multiplicand_shift_5;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
product_o = partial_lo + partial_hi;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
485
fpga/verilog/jls_neighbor_provider.sv
Normal file
485
fpga/verilog/jls_neighbor_provider.sv
Normal file
@@ -0,0 +1,485 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.3 context determination, Annex A.4 prediction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Reconstructed neighborhood selection for Ra/Rb/Rc/Rd
|
||||
// Trace : docs/jls_traceability.md#med-predictor
|
||||
// Example : At x=0, Ra and Rb are the first sample from the previous line;
|
||||
// Rc is the previous line's left-edge extension sample, and Rd
|
||||
// is the next previous-line sample.
|
||||
//
|
||||
// Reconstructed-neighbor provider for one grayscale strip frame. JPEG-LS uses
|
||||
// encoder-side reconstructed samples as prediction history. For NEAR=0 the
|
||||
// reconstructed value is exactly the input sample, so this module commits the
|
||||
// sample to line history immediately and removes the feedback bubble. For
|
||||
// NEAR>0 it keeps one pixel outstanding until the true reconstructed sample
|
||||
// returns, preserving near-lossless standard state. The next pixel is accepted
|
||||
// after Rx is committed; this deliberate timing boundary keeps recon_x/recon_y
|
||||
// out of the upstream ready path at the 250 MHz target.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_neighbor_provider #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Maximum supported runtime image width.
|
||||
parameter int MAX_PIC_COL = 6144
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Pixel event from jls_scan_ctrl is valid.
|
||||
input var logic pixel_valid,
|
||||
|
||||
// This provider can accept the current source pixel.
|
||||
output logic pixel_ready,
|
||||
|
||||
// Original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Original-image coordinate. A strip starts when strip_first_pixel is high.
|
||||
input var logic [12:0] pixel_x,
|
||||
input var logic [12:0] pixel_y,
|
||||
|
||||
// High on the last column of the current row. jls_scan_ctrl registers this
|
||||
// flag with pixel_x/y so the width comparison is not on the line-RAM read
|
||||
// path for Rd.
|
||||
input var logic pixel_row_last,
|
||||
|
||||
// Strip-local boundary flags.
|
||||
input var logic strip_first_pixel,
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// Active-strip fast-commit mode. High only when NEAR==0 for this strip, so
|
||||
// the provider can write X directly into line history without waiting for
|
||||
// the later reconstructed-sample return path.
|
||||
input var logic lossless_fast_mode,
|
||||
|
||||
// Pixel/neighborhood event is valid.
|
||||
output logic neigh_valid,
|
||||
|
||||
// Downstream predictor accepted the neighborhood event.
|
||||
input var logic neigh_ready,
|
||||
|
||||
// Forwarded pixel fields.
|
||||
output logic [PIX_WIDTH-1:0] neigh_sample,
|
||||
output logic [12:0] neigh_x,
|
||||
output logic [12:0] neigh_y,
|
||||
output logic neigh_strip_first_pixel,
|
||||
output logic neigh_strip_last_pixel,
|
||||
|
||||
// Standard reconstructed neighbors.
|
||||
output logic [PIX_WIDTH-1:0] Ra,
|
||||
output logic [PIX_WIDTH-1:0] Rb,
|
||||
output logic [PIX_WIDTH-1:0] Rc,
|
||||
output logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// Reconstructed sample writeback from the later error-quantizer/run stage.
|
||||
input var logic recon_valid,
|
||||
|
||||
// This provider is waiting for the current reconstructed sample.
|
||||
output logic recon_ready,
|
||||
|
||||
// Reconstructed sample Rx and its coordinate.
|
||||
input var logic [PIX_WIDTH-1:0] recon_sample,
|
||||
input var logic [12:0] recon_x,
|
||||
input var logic [12:0] recon_y
|
||||
);
|
||||
|
||||
// Two line banks implement previous/current reconstructed rows. The active
|
||||
// read bank is the previous row, while the other bank receives this row.
|
||||
logic [PIX_WIDTH-1:0] line_bank0 [0:MAX_PIC_COL-1];
|
||||
logic [PIX_WIDTH-1:0] line_bank1 [0:MAX_PIC_COL-1];
|
||||
|
||||
// Read/write bank selector. The write bank is the opposite of read_bank.
|
||||
logic read_bank;
|
||||
logic write_bank;
|
||||
|
||||
// High while the current row is the first row of a standalone strip frame.
|
||||
logic top_row_active;
|
||||
|
||||
// One outstanding pixel is held until its reconstructed sample returns when
|
||||
// NEAR>0. The NEAR=0 path does not use this bubble because Rx == X.
|
||||
logic waiting_reconstruct;
|
||||
logic [12:0] outstanding_x;
|
||||
logic [12:0] outstanding_y;
|
||||
logic outstanding_row_last;
|
||||
|
||||
// Left reconstructed neighbor for non-left-edge pixels in the current row.
|
||||
logic [PIX_WIDTH-1:0] left_Ra;
|
||||
logic [PIX_WIDTH-1:0] left_Ra_for_pixel;
|
||||
|
||||
// JPEG-LS left-edge extension state. CharLS models this with a width+2 line
|
||||
// buffer where previous_line[0] contains the first reconstructed sample from
|
||||
// the line before the previous line. For x=0 this value is Rc; it is zero on
|
||||
// the strip top row and on the row immediately after the strip top row.
|
||||
logic [PIX_WIDTH-1:0] left_edge_Rc;
|
||||
logic [PIX_WIDTH-1:0] row_left_Rb;
|
||||
|
||||
// Address and boundary decode for the source pixel.
|
||||
logic pixel_x_is_left_edge;
|
||||
logic pixel_x_is_right_edge;
|
||||
logic [12:0] rb_addr;
|
||||
logic [12:0] rc_addr;
|
||||
logic [12:0] rd_addr;
|
||||
logic effective_top_row_active;
|
||||
|
||||
// Previous-line samples read from the selected bank.
|
||||
logic [PIX_WIDTH-1:0] prev_Rb;
|
||||
logic [PIX_WIDTH-1:0] prev_Rc;
|
||||
logic [PIX_WIDTH-1:0] prev_Rd;
|
||||
|
||||
// Neighborhood values for the accepted pixel.
|
||||
logic [PIX_WIDTH-1:0] Ra_next;
|
||||
logic [PIX_WIDTH-1:0] Rb_next;
|
||||
logic [PIX_WIDTH-1:0] Rc_next;
|
||||
logic [PIX_WIDTH-1:0] Rd_next;
|
||||
|
||||
// Lossless commit path. On the first pixel of a strip, the read bank is
|
||||
// reset to bank0 and the current row writes to bank1; accept_write_bank makes
|
||||
// that same-cycle choice explicit rather than using the old read_bank value.
|
||||
logic accept_pixel_needs_recon;
|
||||
logic accept_pixel_fast_commit;
|
||||
logic accept_write_bank;
|
||||
logic [PIX_WIDTH-1:0] row_left_Rb_next;
|
||||
logic line_write_valid;
|
||||
logic line_write_bank;
|
||||
logic [12:0] line_write_addr;
|
||||
logic [PIX_WIDTH-1:0] line_write_sample;
|
||||
|
||||
// Handshake terms.
|
||||
logic neigh_slot_open;
|
||||
logic accept_pixel;
|
||||
logic accept_recon;
|
||||
logic accept_recon_write;
|
||||
logic recon_is_row_last;
|
||||
logic [12:0] recon_x_plus_one;
|
||||
logic recon_bypass_not_row_last;
|
||||
logic recon_bypass_strip_ok;
|
||||
logic recon_bypass_x_matches;
|
||||
logic recon_bypass_y_matches;
|
||||
logic same_row_recon_bypass_ready;
|
||||
|
||||
always_comb begin
|
||||
write_bank = ~read_bank;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
effective_top_row_active = top_row_active;
|
||||
if (strip_first_pixel) begin
|
||||
effective_top_row_active = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_x_is_left_edge = 1'b0;
|
||||
if (pixel_x == 13'd0) begin
|
||||
pixel_x_is_left_edge = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_x_is_right_edge = pixel_row_last;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
rb_addr = pixel_x;
|
||||
rc_addr = pixel_x;
|
||||
rd_addr = pixel_x;
|
||||
|
||||
if (!pixel_x_is_left_edge) begin
|
||||
rc_addr = pixel_x - 13'd1;
|
||||
end
|
||||
|
||||
if (!pixel_x_is_right_edge) begin
|
||||
rd_addr = pixel_x + 13'd1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prev_Rb = {PIX_WIDTH{1'b0}};
|
||||
prev_Rc = {PIX_WIDTH{1'b0}};
|
||||
prev_Rd = {PIX_WIDTH{1'b0}};
|
||||
|
||||
if (!effective_top_row_active) begin
|
||||
case (read_bank)
|
||||
1'b0: begin
|
||||
prev_Rb = line_bank0[rb_addr];
|
||||
prev_Rc = line_bank0[rc_addr];
|
||||
prev_Rd = line_bank0[rd_addr];
|
||||
end
|
||||
|
||||
default: begin
|
||||
prev_Rb = line_bank1[rb_addr];
|
||||
prev_Rc = line_bank1[rc_addr];
|
||||
prev_Rd = line_bank1[rd_addr];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
left_Ra_for_pixel = left_Ra;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Ra_next = left_Ra_for_pixel;
|
||||
if (pixel_x_is_left_edge) begin
|
||||
Ra_next = prev_Rb;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Rb_next = prev_Rb;
|
||||
Rc_next = prev_Rc;
|
||||
Rd_next = prev_Rd;
|
||||
|
||||
if (pixel_x_is_left_edge) begin
|
||||
Rc_next = left_edge_Rc;
|
||||
if (effective_top_row_active) begin
|
||||
Rc_next = {PIX_WIDTH{1'b0}};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel_needs_recon = 1'b1;
|
||||
if (lossless_fast_mode) begin
|
||||
accept_pixel_needs_recon = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel_fast_commit = 1'b0;
|
||||
if (accept_pixel && lossless_fast_mode) begin
|
||||
accept_pixel_fast_commit = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_write_bank = write_bank;
|
||||
if (strip_first_pixel) begin
|
||||
accept_write_bank = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
row_left_Rb_next = row_left_Rb;
|
||||
if (strip_first_pixel) begin
|
||||
row_left_Rb_next = {PIX_WIDTH{1'b0}};
|
||||
end else if (pixel_x_is_left_edge) begin
|
||||
row_left_Rb_next = prev_Rb;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// One synthesized write port for the current-row line history. Lossless
|
||||
// fast mode writes X immediately; NEAR>0 writes the returned Rx. The two
|
||||
// cases are mutually exclusive, but muxing them here keeps Vivado from
|
||||
// seeing two unrelated write patterns for the same line-bank memories.
|
||||
line_write_valid = 1'b0;
|
||||
line_write_bank = accept_write_bank;
|
||||
line_write_addr = pixel_x;
|
||||
line_write_sample = pixel_sample;
|
||||
|
||||
if (accept_pixel_fast_commit) begin
|
||||
line_write_valid = 1'b1;
|
||||
end
|
||||
|
||||
if (accept_recon_write) begin
|
||||
line_write_valid = 1'b1;
|
||||
line_write_bank = write_bank;
|
||||
line_write_addr = recon_x;
|
||||
line_write_sample = recon_sample;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
neigh_slot_open = 1'b0;
|
||||
if (!neigh_valid || neigh_ready) begin
|
||||
neigh_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = 1'b0;
|
||||
if (neigh_slot_open && !waiting_reconstruct) begin
|
||||
pixel_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
accept_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_ready = waiting_reconstruct;
|
||||
if (lossless_fast_mode && !waiting_reconstruct) begin
|
||||
recon_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_recon = 1'b0;
|
||||
if (recon_valid && recon_ready) begin
|
||||
accept_recon = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_recon_write = 1'b0;
|
||||
if (accept_recon && waiting_reconstruct) begin
|
||||
accept_recon_write = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_is_row_last = outstanding_row_last;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_x_plus_one = recon_x + 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_not_row_last = 1'b0;
|
||||
if (!recon_is_row_last) begin
|
||||
recon_bypass_not_row_last = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_strip_ok = 1'b0;
|
||||
if (!strip_first_pixel) begin
|
||||
recon_bypass_strip_ok = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_x_matches = 1'b0;
|
||||
if (pixel_x == recon_x_plus_one) begin
|
||||
recon_bypass_x_matches = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recon_bypass_y_matches = 1'b0;
|
||||
if (pixel_y == recon_y) begin
|
||||
recon_bypass_y_matches = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Diagnostic decode for the previous same-row bypass condition. The
|
||||
// timing path now waits one clock after Rx writeback instead of using this
|
||||
// condition in pixel_ready.
|
||||
same_row_recon_bypass_ready = 1'b0;
|
||||
if (accept_recon_write && recon_bypass_not_row_last && recon_bypass_strip_ok &&
|
||||
recon_bypass_x_matches && recon_bypass_y_matches) begin
|
||||
same_row_recon_bypass_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
read_bank <= 1'b0;
|
||||
top_row_active <= 1'b1;
|
||||
waiting_reconstruct <= 1'b0;
|
||||
outstanding_x <= 13'd0;
|
||||
outstanding_y <= 13'd0;
|
||||
outstanding_row_last <= 1'b0;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= {PIX_WIDTH{1'b0}};
|
||||
row_left_Rb <= {PIX_WIDTH{1'b0}};
|
||||
neigh_valid <= 1'b0;
|
||||
neigh_sample <= {PIX_WIDTH{1'b0}};
|
||||
neigh_x <= 13'd0;
|
||||
neigh_y <= 13'd0;
|
||||
neigh_strip_first_pixel <= 1'b0;
|
||||
neigh_strip_last_pixel <= 1'b0;
|
||||
Ra <= {PIX_WIDTH{1'b0}};
|
||||
Rb <= {PIX_WIDTH{1'b0}};
|
||||
Rc <= {PIX_WIDTH{1'b0}};
|
||||
Rd <= {PIX_WIDTH{1'b0}};
|
||||
end else begin
|
||||
if (neigh_valid && neigh_ready && !accept_pixel) begin
|
||||
neigh_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (accept_pixel) begin
|
||||
neigh_valid <= 1'b1;
|
||||
neigh_sample <= pixel_sample;
|
||||
neigh_x <= pixel_x;
|
||||
neigh_y <= pixel_y;
|
||||
neigh_strip_first_pixel <= strip_first_pixel;
|
||||
neigh_strip_last_pixel <= strip_last_pixel;
|
||||
Ra <= Ra_next;
|
||||
Rb <= Rb_next;
|
||||
Rc <= Rc_next;
|
||||
Rd <= Rd_next;
|
||||
waiting_reconstruct <= accept_pixel_needs_recon;
|
||||
outstanding_x <= pixel_x;
|
||||
outstanding_y <= pixel_y;
|
||||
outstanding_row_last <= pixel_x_is_right_edge;
|
||||
|
||||
if (strip_first_pixel) begin
|
||||
top_row_active <= 1'b1;
|
||||
read_bank <= 1'b0;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= {PIX_WIDTH{1'b0}};
|
||||
row_left_Rb <= {PIX_WIDTH{1'b0}};
|
||||
end else if (pixel_x_is_left_edge) begin
|
||||
row_left_Rb <= prev_Rb;
|
||||
end
|
||||
|
||||
if (accept_pixel_fast_commit) begin
|
||||
left_Ra <= pixel_sample;
|
||||
row_left_Rb <= row_left_Rb_next;
|
||||
|
||||
if (pixel_x_is_right_edge) begin
|
||||
read_bank <= accept_write_bank;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= row_left_Rb_next;
|
||||
top_row_active <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (accept_recon_write) begin
|
||||
left_Ra <= recon_sample;
|
||||
if (!accept_pixel) begin
|
||||
waiting_reconstruct <= 1'b0;
|
||||
end
|
||||
|
||||
if (recon_is_row_last) begin
|
||||
read_bank <= write_bank;
|
||||
left_Ra <= {PIX_WIDTH{1'b0}};
|
||||
left_edge_Rc <= row_left_Rb;
|
||||
top_row_active <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
if (line_write_valid) begin
|
||||
case (line_write_bank)
|
||||
1'b0: begin
|
||||
line_bank0[line_write_addr] <= line_write_sample;
|
||||
end
|
||||
|
||||
default: begin
|
||||
line_bank1[line_write_addr] <= line_write_sample;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
218
fpga/verilog/jls_output_buffer.sv
Normal file
218
fpga/verilog/jls_output_buffer.sv
Normal file
@@ -0,0 +1,218 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.1-C.4 marker stream byte order
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Encoded byte stream delivery after JPEG-LS bit packing
|
||||
// Trace : docs/jls_traceability.md#jls-output-buffer
|
||||
// Example : A byte event {start=1, byte=8'hFF} becomes ofifo_wdata=9'h1FF.
|
||||
//
|
||||
// Internal output buffer for the 9-bit output FIFO interface. The external
|
||||
// ofifo_full/ofifo_alfull inputs are intentionally ignored by RTL behavior per
|
||||
// the SRS; simulation reports an error if a write happens while ofifo_full=1.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_output_buffer #(
|
||||
// Internal output-buffer capacity in bytes. Default comes from the SRS.
|
||||
parameter int OUT_BUF_BYTES = 8192,
|
||||
|
||||
// Input-pause margin in bytes. pause_req asserts when occupancy reaches
|
||||
// OUT_BUF_BYTES - OUT_BUF_AFULL_MARGIN.
|
||||
parameter int OUT_BUF_AFULL_MARGIN = 256
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Encoded byte event from header writer or bit packer.
|
||||
input var logic byte_valid,
|
||||
|
||||
// This buffer can accept the encoded byte event.
|
||||
output logic byte_ready,
|
||||
|
||||
// JPEG-LS byte in marker-stream order.
|
||||
input var logic [7:0] byte_data,
|
||||
|
||||
// Sideband copied to ofifo_wdata[8] for the original-image first byte only.
|
||||
input var logic original_image_start,
|
||||
|
||||
// Accepted byte event pulse for statistics and dynamic NEAR accounting.
|
||||
output logic byte_accepted,
|
||||
|
||||
// Internal pause request for upstream pipeline throttling.
|
||||
output logic pause_req,
|
||||
|
||||
// Current buffer occupancy in bytes, used by verification reports.
|
||||
output logic [$clog2(OUT_BUF_BYTES + 1)-1:0] buffer_level,
|
||||
|
||||
// Output FIFO write clock, same frequency and phase as clk.
|
||||
output logic ofifo_wclk,
|
||||
|
||||
// Output FIFO write enable.
|
||||
output logic ofifo_wr,
|
||||
|
||||
// Output FIFO data. Bit 8 marks original-image start; bits 7:0 carry bytes.
|
||||
output logic [8:0] ofifo_wdata,
|
||||
|
||||
// Reserved output FIFO full flag. RTL ignores this input for flow control.
|
||||
input var logic ofifo_full,
|
||||
|
||||
// Reserved output FIFO almost-full flag. RTL ignores this input.
|
||||
input var logic ofifo_alfull
|
||||
);
|
||||
|
||||
// Pointer width for the circular byte buffer.
|
||||
localparam int PTR_WIDTH = $clog2(OUT_BUF_BYTES);
|
||||
|
||||
// Occupancy counter width. It must represent OUT_BUF_BYTES exactly.
|
||||
localparam int COUNT_WIDTH = $clog2(OUT_BUF_BYTES + 1);
|
||||
|
||||
// Last legal circular-buffer pointer value.
|
||||
localparam logic [PTR_WIDTH-1:0] PTR_LAST_VALUE = OUT_BUF_BYTES - 1;
|
||||
|
||||
// Buffer capacity and near-full threshold as sized constants.
|
||||
localparam logic [COUNT_WIDTH-1:0] OUT_BUF_BYTES_VALUE = OUT_BUF_BYTES;
|
||||
localparam logic [COUNT_WIDTH-1:0] AFULL_MARGIN_VALUE = OUT_BUF_AFULL_MARGIN;
|
||||
localparam logic [COUNT_WIDTH-1:0] PAUSE_LEVEL_VALUE =
|
||||
OUT_BUF_BYTES - OUT_BUF_AFULL_MARGIN;
|
||||
|
||||
// Circular storage. Bit 8 is original_image_start, bits 7:0 are stream byte.
|
||||
logic [8:0] buffer_mem [0:OUT_BUF_BYTES-1];
|
||||
|
||||
// Circular write and read pointers.
|
||||
logic [PTR_WIDTH-1:0] write_ptr;
|
||||
logic [PTR_WIDTH-1:0] read_ptr;
|
||||
logic [PTR_WIDTH-1:0] write_ptr_next;
|
||||
logic [PTR_WIDTH-1:0] read_ptr_next;
|
||||
|
||||
// Occupancy and status flags.
|
||||
logic [COUNT_WIDTH-1:0] occupancy_count;
|
||||
logic buffer_empty;
|
||||
logic buffer_full;
|
||||
logic push_byte;
|
||||
logic pop_byte;
|
||||
|
||||
// Packed byte event stored in the internal buffer.
|
||||
logic [8:0] buffer_write_word;
|
||||
|
||||
// Reserved input observation signal keeps intent explicit without changing
|
||||
// flow control behavior.
|
||||
logic ofifo_alfull_ignored;
|
||||
|
||||
assign ofifo_wclk = clk;
|
||||
|
||||
always_comb begin
|
||||
buffer_level = occupancy_count;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
buffer_write_word = {original_image_start, byte_data};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
buffer_empty = 1'b0;
|
||||
if (occupancy_count == {COUNT_WIDTH{1'b0}}) begin
|
||||
buffer_empty = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
buffer_full = 1'b0;
|
||||
if (occupancy_count == OUT_BUF_BYTES_VALUE) begin
|
||||
buffer_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_ready = 1'b0;
|
||||
if (!buffer_full) begin
|
||||
byte_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
byte_accepted = 1'b0;
|
||||
if (byte_valid && byte_ready) begin
|
||||
byte_accepted = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pop_byte = 1'b0;
|
||||
if (!buffer_empty) begin
|
||||
pop_byte = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
push_byte = byte_accepted;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pause_req = 1'b0;
|
||||
if (occupancy_count >= PAUSE_LEVEL_VALUE) begin
|
||||
pause_req = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
write_ptr_next = write_ptr + {{(PTR_WIDTH-1){1'b0}}, 1'b1};
|
||||
if (write_ptr == PTR_LAST_VALUE) begin
|
||||
write_ptr_next = {PTR_WIDTH{1'b0}};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
read_ptr_next = read_ptr + {{(PTR_WIDTH-1){1'b0}}, 1'b1};
|
||||
if (read_ptr == PTR_LAST_VALUE) begin
|
||||
read_ptr_next = {PTR_WIDTH{1'b0}};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ofifo_alfull_ignored = ofifo_alfull;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
write_ptr <= {PTR_WIDTH{1'b0}};
|
||||
read_ptr <= {PTR_WIDTH{1'b0}};
|
||||
occupancy_count <= {COUNT_WIDTH{1'b0}};
|
||||
ofifo_wr <= 1'b0;
|
||||
ofifo_wdata <= 9'd0;
|
||||
end else begin
|
||||
ofifo_wr <= pop_byte;
|
||||
|
||||
if (pop_byte) begin
|
||||
ofifo_wdata <= buffer_mem[read_ptr];
|
||||
read_ptr <= read_ptr_next;
|
||||
end else begin
|
||||
ofifo_wdata <= 9'd0;
|
||||
end
|
||||
|
||||
if (push_byte) begin
|
||||
buffer_mem[write_ptr] <= buffer_write_word;
|
||||
write_ptr <= write_ptr_next;
|
||||
end
|
||||
|
||||
case ({push_byte, pop_byte})
|
||||
2'b10: begin
|
||||
occupancy_count <= occupancy_count + {{(COUNT_WIDTH-1){1'b0}}, 1'b1};
|
||||
end
|
||||
|
||||
2'b01: begin
|
||||
occupancy_count <= occupancy_count - {{(COUNT_WIDTH-1){1'b0}}, 1'b1};
|
||||
end
|
||||
|
||||
default: begin
|
||||
occupancy_count <= occupancy_count;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
196
fpga/verilog/jls_prediction_corrector.sv
Normal file
196
fpga/verilog/jls_prediction_corrector.sv
Normal file
@@ -0,0 +1,196 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex A.6 bias variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Px correction by C[Q] followed by bounds correction
|
||||
// Trace : docs/jls_traceability.md#regular-prediction-correction
|
||||
// Example : Px=20,C=-3,negative_context=0 gives corrected_Px=17.
|
||||
//
|
||||
// Registered prediction correction stage. It applies context sign to C[Q],
|
||||
// adds the result to the MED prediction Px, and clamps the prediction to
|
||||
// 0..MAXVAL like the JPEG-LS correct_prediction operation.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_prediction_corrector #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input context event is valid.
|
||||
input var logic context_valid,
|
||||
|
||||
// This stage can accept the current context event.
|
||||
output logic context_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] context_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] context_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
input var logic [12:0] context_y,
|
||||
|
||||
// Forwarded strip boundary flags.
|
||||
input var logic context_strip_first_pixel,
|
||||
input var logic context_strip_last_pixel,
|
||||
|
||||
// JPEG-LS MED prediction value Px.
|
||||
input var logic [PIX_WIDTH-1:0] Px,
|
||||
|
||||
// Standard bias correction variable C[Q], range -128..127.
|
||||
input var logic [31:0] A,
|
||||
input var logic signed [31:0] B,
|
||||
input var logic signed [8:0] C,
|
||||
input var logic [15:0] N,
|
||||
|
||||
// High when the quantized context sign is negative.
|
||||
input var logic context_negative,
|
||||
|
||||
// Context index and run-mode flag are forwarded for later stages.
|
||||
input var logic [8:0] context_index,
|
||||
input var logic run_mode_context,
|
||||
|
||||
// Corrected prediction event is valid.
|
||||
output logic corrected_valid,
|
||||
|
||||
// Downstream regular/run-mode stage accepted this event.
|
||||
input var logic corrected_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
output logic [PIX_WIDTH-1:0] corrected_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] corrected_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] corrected_y,
|
||||
|
||||
// Forwarded strip boundary flags.
|
||||
output logic corrected_strip_first_pixel,
|
||||
output logic corrected_strip_last_pixel,
|
||||
|
||||
// Corrected and clamped prediction value.
|
||||
output logic [PIX_WIDTH-1:0] corrected_Px,
|
||||
|
||||
// Forwarded context metadata.
|
||||
output logic [8:0] corrected_context_index,
|
||||
output logic corrected_context_negative,
|
||||
output logic corrected_run_mode_context,
|
||||
|
||||
// Forwarded pre-update context variables for jls_context_update.
|
||||
output logic [31:0] corrected_A,
|
||||
output logic signed [31:0] corrected_B,
|
||||
output logic signed [8:0] corrected_C,
|
||||
output logic [15:0] corrected_N
|
||||
);
|
||||
|
||||
// Signed working width covers 16-bit MAXVAL plus the C[Q] range.
|
||||
localparam logic signed [18:0] MAXVAL_VALUE = (19'sd1 <<< PIX_WIDTH) - 19'sd1;
|
||||
|
||||
// Context-signed C and prediction sum.
|
||||
logic signed [18:0] C_ext;
|
||||
logic signed [18:0] signed_C;
|
||||
logic signed [18:0] Px_ext;
|
||||
logic signed [18:0] prediction_sum;
|
||||
logic [PIX_WIDTH-1:0] corrected_Px_next;
|
||||
|
||||
// Handshake terms.
|
||||
logic slot_open;
|
||||
logic accept_context;
|
||||
|
||||
always_comb begin
|
||||
C_ext = {{10{C[8]}}, C};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
signed_C = C_ext;
|
||||
if (context_negative) begin
|
||||
signed_C = -C_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
Px_ext = $signed({3'd0, Px});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
prediction_sum = Px_ext + signed_C;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
corrected_Px_next = prediction_sum[PIX_WIDTH-1:0];
|
||||
if (prediction_sum < 19'sd0) begin
|
||||
corrected_Px_next = {PIX_WIDTH{1'b0}};
|
||||
end else if (prediction_sum > MAXVAL_VALUE) begin
|
||||
corrected_Px_next = MAXVAL_VALUE[PIX_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_open = 1'b0;
|
||||
if (!corrected_valid || corrected_ready) begin
|
||||
slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
context_ready = slot_open;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_context = 1'b0;
|
||||
if (context_valid && context_ready) begin
|
||||
accept_context = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
corrected_valid <= 1'b0;
|
||||
corrected_sample <= {PIX_WIDTH{1'b0}};
|
||||
corrected_x <= 13'd0;
|
||||
corrected_y <= 13'd0;
|
||||
corrected_strip_first_pixel <= 1'b0;
|
||||
corrected_strip_last_pixel <= 1'b0;
|
||||
corrected_Px <= {PIX_WIDTH{1'b0}};
|
||||
corrected_context_index <= 9'd0;
|
||||
corrected_context_negative <= 1'b0;
|
||||
corrected_run_mode_context <= 1'b0;
|
||||
corrected_A <= 32'd0;
|
||||
corrected_B <= 32'sd0;
|
||||
corrected_C <= 9'sd0;
|
||||
corrected_N <= 16'd0;
|
||||
end else begin
|
||||
if (corrected_valid && corrected_ready && !accept_context) begin
|
||||
corrected_valid <= 1'b0;
|
||||
end
|
||||
|
||||
if (accept_context) begin
|
||||
corrected_valid <= 1'b1;
|
||||
corrected_sample <= context_sample;
|
||||
corrected_x <= context_x;
|
||||
corrected_y <= context_y;
|
||||
corrected_strip_first_pixel <= context_strip_first_pixel;
|
||||
corrected_strip_last_pixel <= context_strip_last_pixel;
|
||||
corrected_Px <= corrected_Px_next;
|
||||
corrected_context_index <= context_index;
|
||||
corrected_context_negative <= context_negative;
|
||||
corrected_run_mode_context <= run_mode_context;
|
||||
corrected_A <= A;
|
||||
corrected_B <= B;
|
||||
corrected_C <= C;
|
||||
corrected_N <= N;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
273
fpga/verilog/jls_predictor.sv
Normal file
273
fpga/verilog/jls_predictor.sv
Normal file
@@ -0,0 +1,273 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.4 prediction, Annex G.1 regular-mode variables
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : MED predictor / Px calculation from Ra, Rb, and Rc
|
||||
// Trace : docs/jls_traceability.md#med-predictor
|
||||
// Example : If Ra=10, Rb=20, Rc=15, Px=Ra+Rb-Rc=15.
|
||||
//
|
||||
// Registered MED predictor stage. A separate line-buffer stage supplies the
|
||||
// reconstructed neighbors Ra/Rb/Rc/Rd. This split keeps the neighbor memory
|
||||
// path independent from the MED compare/add path for the 250 MHz target.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_predictor #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Input pixel/neighborhood event is valid.
|
||||
input var logic pixel_valid,
|
||||
|
||||
// This predictor can accept the current input event.
|
||||
output logic pixel_ready,
|
||||
|
||||
// Original input sample X from the standard encoder notation.
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Zero-based original-image column coordinate.
|
||||
input var logic [12:0] pixel_x,
|
||||
|
||||
// Zero-based original-image row coordinate.
|
||||
input var logic [12:0] pixel_y,
|
||||
|
||||
// First pixel of the current standalone strip frame.
|
||||
input var logic strip_first_pixel,
|
||||
|
||||
// Last pixel of the current standalone strip frame.
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// Reconstructed neighbor to the left of X.
|
||||
input var logic [PIX_WIDTH-1:0] Ra,
|
||||
|
||||
// Reconstructed neighbor above X.
|
||||
input var logic [PIX_WIDTH-1:0] Rb,
|
||||
|
||||
// Reconstructed neighbor above-left of X.
|
||||
input var logic [PIX_WIDTH-1:0] Rc,
|
||||
|
||||
// Reconstructed neighbor above-right of X. Forwarded for context gradients.
|
||||
input var logic [PIX_WIDTH-1:0] Rd,
|
||||
|
||||
// Predicted event is valid.
|
||||
output logic predict_valid,
|
||||
|
||||
// Downstream context/error stage accepted the current predicted event.
|
||||
input var logic predict_ready,
|
||||
|
||||
// Forwarded original input sample X.
|
||||
output logic [PIX_WIDTH-1:0] predict_sample,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] predict_x,
|
||||
|
||||
// Forwarded pixel coordinate.
|
||||
output logic [12:0] predict_y,
|
||||
|
||||
// Forwarded first-pixel flag for strip-local state reset.
|
||||
output logic predict_strip_first_pixel,
|
||||
|
||||
// Forwarded last-pixel flag for strip-local flush handling.
|
||||
output logic predict_strip_last_pixel,
|
||||
|
||||
// Forwarded reconstructed neighbors for context quantization.
|
||||
output logic [PIX_WIDTH-1:0] predict_Ra,
|
||||
output logic [PIX_WIDTH-1:0] predict_Rb,
|
||||
output logic [PIX_WIDTH-1:0] predict_Rc,
|
||||
output logic [PIX_WIDTH-1:0] predict_Rd,
|
||||
|
||||
// JPEG-LS MED prediction value Px.
|
||||
output logic [PIX_WIDTH-1:0] Px
|
||||
);
|
||||
|
||||
// One extra bit keeps Ra+Rb-Rc arithmetic inside a non-overflowing range.
|
||||
logic [PIX_WIDTH:0] ra_ext;
|
||||
logic [PIX_WIDTH:0] rb_ext;
|
||||
logic [PIX_WIDTH:0] rc_ext;
|
||||
logic [PIX_WIDTH:0] neighbor_min_ext;
|
||||
logic [PIX_WIDTH:0] neighbor_max_ext;
|
||||
logic [PIX_WIDTH:0] med_sum_ext;
|
||||
logic [PIX_WIDTH-1:0] med_selected;
|
||||
|
||||
// Split comparison terms. This mirrors the standard if/else decision while
|
||||
// making the logic depth visible for later pipeline review.
|
||||
logic ra_ge_rb;
|
||||
logic rc_ge_neighbor_max;
|
||||
logic rc_le_neighbor_min;
|
||||
logic output_queue_full;
|
||||
logic accept_pixel;
|
||||
logic output_accept;
|
||||
|
||||
// Second output slot. This local two-entry queue breaks downstream
|
||||
// ready/CE fan-in from the mode-router output while preserving event order.
|
||||
// The first slot is the public predict_* register set; this slot holds the
|
||||
// next Annex A.4 MED prediction event if the downstream stage is stalled.
|
||||
logic predict_next_valid;
|
||||
logic [PIX_WIDTH-1:0] predict_next_sample;
|
||||
logic [12:0] predict_next_x;
|
||||
logic [12:0] predict_next_y;
|
||||
logic predict_next_strip_first_pixel;
|
||||
logic predict_next_strip_last_pixel;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Ra;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Rb;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Rc;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Rd;
|
||||
logic [PIX_WIDTH-1:0] predict_next_Px;
|
||||
|
||||
always_comb begin
|
||||
ra_ext = {1'b0, Ra};
|
||||
rb_ext = {1'b0, Rb};
|
||||
rc_ext = {1'b0, Rc};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
ra_ge_rb = 1'b0;
|
||||
if (Ra >= Rb) begin
|
||||
ra_ge_rb = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
neighbor_min_ext = ra_ext;
|
||||
neighbor_max_ext = rb_ext;
|
||||
if (ra_ge_rb) begin
|
||||
neighbor_min_ext = rb_ext;
|
||||
neighbor_max_ext = ra_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
rc_ge_neighbor_max = 1'b0;
|
||||
if (rc_ext >= neighbor_max_ext) begin
|
||||
rc_ge_neighbor_max = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
rc_le_neighbor_min = 1'b0;
|
||||
if (rc_ext <= neighbor_min_ext) begin
|
||||
rc_le_neighbor_min = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
med_sum_ext = ra_ext + rb_ext - rc_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
med_selected = med_sum_ext[PIX_WIDTH-1:0];
|
||||
if (rc_ge_neighbor_max) begin
|
||||
med_selected = neighbor_min_ext[PIX_WIDTH-1:0];
|
||||
end else if (rc_le_neighbor_min) begin
|
||||
med_selected = neighbor_max_ext[PIX_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
output_queue_full = 1'b0;
|
||||
if (predict_valid && predict_next_valid) begin
|
||||
output_queue_full = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = !output_queue_full;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_pixel = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
accept_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
output_accept = 1'b0;
|
||||
if (predict_valid && predict_ready) begin
|
||||
output_accept = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
predict_valid <= 1'b0;
|
||||
predict_next_valid <= 1'b0;
|
||||
predict_sample <= {PIX_WIDTH{1'b0}};
|
||||
predict_x <= 13'd0;
|
||||
predict_y <= 13'd0;
|
||||
predict_strip_first_pixel <= 1'b0;
|
||||
predict_strip_last_pixel <= 1'b0;
|
||||
predict_Ra <= {PIX_WIDTH{1'b0}};
|
||||
predict_Rb <= {PIX_WIDTH{1'b0}};
|
||||
predict_Rc <= {PIX_WIDTH{1'b0}};
|
||||
predict_Rd <= {PIX_WIDTH{1'b0}};
|
||||
Px <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_sample <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_x <= 13'd0;
|
||||
predict_next_y <= 13'd0;
|
||||
predict_next_strip_first_pixel <= 1'b0;
|
||||
predict_next_strip_last_pixel <= 1'b0;
|
||||
predict_next_Ra <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Rb <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Rc <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Rd <= {PIX_WIDTH{1'b0}};
|
||||
predict_next_Px <= {PIX_WIDTH{1'b0}};
|
||||
end else begin
|
||||
if (output_accept) begin
|
||||
if (predict_next_valid) begin
|
||||
predict_valid <= 1'b1;
|
||||
predict_sample <= predict_next_sample;
|
||||
predict_x <= predict_next_x;
|
||||
predict_y <= predict_next_y;
|
||||
predict_strip_first_pixel <= predict_next_strip_first_pixel;
|
||||
predict_strip_last_pixel <= predict_next_strip_last_pixel;
|
||||
predict_Ra <= predict_next_Ra;
|
||||
predict_Rb <= predict_next_Rb;
|
||||
predict_Rc <= predict_next_Rc;
|
||||
predict_Rd <= predict_next_Rd;
|
||||
Px <= predict_next_Px;
|
||||
predict_next_valid <= 1'b0;
|
||||
end else begin
|
||||
predict_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
if (accept_pixel) begin
|
||||
if (!predict_valid || output_accept) begin
|
||||
predict_valid <= 1'b1;
|
||||
predict_sample <= pixel_sample;
|
||||
predict_x <= pixel_x;
|
||||
predict_y <= pixel_y;
|
||||
predict_strip_first_pixel <= strip_first_pixel;
|
||||
predict_strip_last_pixel <= strip_last_pixel;
|
||||
predict_Ra <= Ra;
|
||||
predict_Rb <= Rb;
|
||||
predict_Rc <= Rc;
|
||||
predict_Rd <= Rd;
|
||||
Px <= med_selected;
|
||||
end else begin
|
||||
predict_next_valid <= 1'b1;
|
||||
predict_next_sample <= pixel_sample;
|
||||
predict_next_x <= pixel_x;
|
||||
predict_next_y <= pixel_y;
|
||||
predict_next_strip_first_pixel <= strip_first_pixel;
|
||||
predict_next_strip_last_pixel <= strip_last_pixel;
|
||||
predict_next_Ra <= Ra;
|
||||
predict_next_Rb <= Rb;
|
||||
predict_next_Rc <= Rc;
|
||||
predict_next_Rd <= Rd;
|
||||
predict_next_Px <= med_selected;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
138
fpga/verilog/jls_preset_defaults.sv
Normal file
138
fpga/verilog/jls_preset_defaults.sv
Normal file
@@ -0,0 +1,138 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex C.2.4.1.1 preset coding parameters
|
||||
// Figure : C.3 clamping function, referenced by default threshold rules
|
||||
// Table : Table C.1 valid preset parameters, Table C.2 RESET, Table C.3 defaults
|
||||
// Pseudocode : Default threshold calculation for MAXVAL >= 128
|
||||
// Trace : docs/jls_traceability.md#jls-preset-defaults
|
||||
// Example : PIX_WIDTH=8, NEAR=0 gives MAXVAL=255, T1=3, T2=7, T3=21.
|
||||
//
|
||||
// JPEG-LS default preset coding parameter helper. The first RTL version only
|
||||
// supports 8/10/12/14/16-bit grayscale samples and NEAR is clamped to 0..31.
|
||||
// For all supported sample precisions MAXVAL >= 128. With NEAR <= 31 the
|
||||
// default thresholds do not hit MAXVAL, so the standard C.2.4.1.1 equations
|
||||
// reduce to shallow shift-add expressions:
|
||||
// T1 = FACTOR * 1 + 2 + 3*NEAR
|
||||
// T2 = FACTOR * 4 + 3 + 5*NEAR
|
||||
// T3 = FACTOR * 17 + 4 + 7*NEAR
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_preset_defaults #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Requested NEAR value. Values above 31 are clamped defensively.
|
||||
input var logic [5:0] near,
|
||||
|
||||
// JPEG-LS LSE MAXVAL preset coding parameter.
|
||||
output logic [15:0] preset_maxval,
|
||||
|
||||
// JPEG-LS LSE T1 preset coding parameter.
|
||||
output logic [15:0] preset_t1,
|
||||
|
||||
// JPEG-LS LSE T2 preset coding parameter.
|
||||
output logic [15:0] preset_t2,
|
||||
|
||||
// JPEG-LS LSE T3 preset coding parameter.
|
||||
output logic [15:0] preset_t3,
|
||||
|
||||
// JPEG-LS LSE RESET preset coding parameter.
|
||||
output logic [15:0] preset_reset
|
||||
);
|
||||
|
||||
// Default RESET value from T.87 Table C.2.
|
||||
localparam logic [15:0] DEFAULT_RESET_VALUE = 16'd64;
|
||||
|
||||
// Defensive NEAR clamp for the project maximum.
|
||||
logic [5:0] near_clamped;
|
||||
|
||||
// Shift-add terms for 3*NEAR, 5*NEAR, and 7*NEAR.
|
||||
logic [15:0] near_ext;
|
||||
logic [15:0] near_times_2;
|
||||
logic [15:0] near_times_3;
|
||||
logic [15:0] near_times_4;
|
||||
logic [15:0] near_times_5;
|
||||
logic [15:0] near_times_7;
|
||||
|
||||
// Base threshold values after applying the standard FACTOR term.
|
||||
logic [15:0] base_t1;
|
||||
logic [15:0] base_t2;
|
||||
logic [15:0] base_t3;
|
||||
|
||||
always_comb begin
|
||||
near_clamped = near;
|
||||
if (near > 6'd31) begin
|
||||
near_clamped = 6'd31;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_ext = {10'd0, near_clamped};
|
||||
near_times_2 = {near_ext[14:0], 1'b0};
|
||||
near_times_3 = near_times_2 + near_ext;
|
||||
near_times_4 = {near_ext[13:0], 2'b00};
|
||||
near_times_5 = near_times_4 + near_ext;
|
||||
near_times_7 = near_times_4 + near_times_2 + near_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
preset_maxval = 16'hFFFF;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
|
||||
case (PIX_WIDTH)
|
||||
8: begin
|
||||
preset_maxval = 16'd255;
|
||||
base_t1 = 16'd3;
|
||||
base_t2 = 16'd7;
|
||||
base_t3 = 16'd21;
|
||||
end
|
||||
|
||||
10: begin
|
||||
preset_maxval = 16'd1023;
|
||||
base_t1 = 16'd6;
|
||||
base_t2 = 16'd19;
|
||||
base_t3 = 16'd72;
|
||||
end
|
||||
|
||||
12: begin
|
||||
preset_maxval = 16'd4095;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
|
||||
14: begin
|
||||
preset_maxval = 16'd16383;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
|
||||
16: begin
|
||||
preset_maxval = 16'hFFFF;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
|
||||
default: begin
|
||||
preset_maxval = 16'hFFFF;
|
||||
base_t1 = 16'd18;
|
||||
base_t2 = 16'd67;
|
||||
base_t3 = 16'd276;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
preset_t1 = base_t1 + near_times_3;
|
||||
preset_t2 = base_t2 + near_times_5;
|
||||
preset_t3 = base_t3 + near_times_7;
|
||||
preset_reset = DEFAULT_RESET_VALUE;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
805
fpga/verilog/jls_regular_error_quantizer.sv
Normal file
805
fpga/verilog/jls_regular_error_quantizer.sv
Normal file
@@ -0,0 +1,805 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 prediction error encoding, Annex A.2 RANGE
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval quantization/modulo and reconstructed sample computation
|
||||
// Trace : docs/jls_traceability.md#regular-error-quantization
|
||||
// Example : X=24, Px=20, NEAR=1 gives Errval=1 and Rx=23.
|
||||
//
|
||||
// Regular-mode error quantizer and reconstructed-sample calculator. NEAR>0
|
||||
// uses an exact reciprocal-LUT multiply and correction pipeline: one cycle for
|
||||
// the reciprocal multiply, one cycle for the quotient correction, then the
|
||||
// standard Annex A.5 modulo/reconstruction result. This avoids a large
|
||||
// combinational divider while reducing the earlier one-bit-per-cycle latency.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_regular_error_quantizer #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Corrected prediction event is valid.
|
||||
input var logic corrected_valid,
|
||||
|
||||
// This stage can accept the current event.
|
||||
output logic corrected_ready,
|
||||
|
||||
// Original input sample X.
|
||||
input var logic [PIX_WIDTH-1:0] corrected_sample,
|
||||
|
||||
// Pixel coordinate forwarded for line-buffer writeback/reporting.
|
||||
input var logic [12:0] corrected_x,
|
||||
input var logic [12:0] corrected_y,
|
||||
|
||||
// Strip boundary flags forwarded with the result.
|
||||
input var logic corrected_strip_first_pixel,
|
||||
input var logic corrected_strip_last_pixel,
|
||||
|
||||
// Corrected prediction value after C[Q] and bounds correction.
|
||||
input var logic [PIX_WIDTH-1:0] corrected_Px,
|
||||
|
||||
// Forwarded context metadata.
|
||||
input var logic [8:0] corrected_context_index,
|
||||
input var logic corrected_context_negative,
|
||||
input var logic corrected_run_mode_context,
|
||||
|
||||
// Pre-update context variables forwarded from jls_context_model.
|
||||
input var logic [31:0] corrected_A,
|
||||
input var logic signed [31:0] corrected_B,
|
||||
input var logic signed [8:0] corrected_C,
|
||||
input var logic [15:0] corrected_N,
|
||||
|
||||
// Coding parameters for the current strip frame.
|
||||
input var logic [16:0] RANGE,
|
||||
input var logic [4:0] qbpp,
|
||||
input var logic [6:0] LIMIT,
|
||||
input var logic [5:0] NEAR,
|
||||
|
||||
// Quantized error event is valid.
|
||||
output logic err_valid,
|
||||
|
||||
// Downstream context-update stage accepted the event.
|
||||
input var logic err_ready,
|
||||
|
||||
// Quantized signed prediction error Errval after context sign handling.
|
||||
output logic signed [31:0] Errval,
|
||||
|
||||
// Reconstructed sample Rx used by the line-buffer stage.
|
||||
output logic [PIX_WIDTH-1:0] reconstructed_sample,
|
||||
|
||||
// Forwarded coordinate and strip flags.
|
||||
output logic [12:0] err_x,
|
||||
output logic [12:0] err_y,
|
||||
output logic err_strip_first_pixel,
|
||||
output logic err_strip_last_pixel,
|
||||
|
||||
// Forwarded context and coding metadata.
|
||||
output logic [8:0] err_context_index,
|
||||
output logic err_context_negative,
|
||||
output logic err_run_mode_context,
|
||||
output logic [4:0] err_qbpp,
|
||||
output logic [6:0] err_LIMIT,
|
||||
|
||||
// Forwarded pre-update context variables for jls_context_update.
|
||||
output logic [31:0] err_A,
|
||||
output logic signed [31:0] err_B,
|
||||
output logic signed [8:0] err_C,
|
||||
output logic [15:0] err_N
|
||||
);
|
||||
|
||||
// Divider width covers max numerator MAXVAL + NEAR.
|
||||
localparam int DIV_WIDTH = PIX_WIDTH + 1;
|
||||
localparam int RECIP_SHIFT = 24;
|
||||
localparam int RECIP_MAGIC_WIDTH = 23;
|
||||
localparam int RECIP_PRODUCT_WIDTH = DIV_WIDTH + RECIP_MAGIC_WIDTH;
|
||||
localparam int RECIP_CHECK_WIDTH = DIV_WIDTH + 6;
|
||||
|
||||
// State for the exact reciprocal-LUT division pipeline when NEAR > 0.
|
||||
typedef enum logic [3:0] {
|
||||
STATE_IDLE = 4'd0,
|
||||
STATE_DIV_MUL = 4'd1,
|
||||
STATE_DIV_CHECK = 4'd2,
|
||||
STATE_DIV_CORRECT = 4'd3,
|
||||
STATE_ERRVAL = 4'd4,
|
||||
STATE_RECON_MUL = 4'd5,
|
||||
STATE_RECON_SUM = 4'd6,
|
||||
STATE_RECON_CALC = 4'd7,
|
||||
STATE_RECON_CLAMP = 4'd8,
|
||||
STATE_FINISH = 4'd9,
|
||||
STATE_INPUT_PREP = 4'd10,
|
||||
STATE_ERRVAL_SIGN = 4'd11,
|
||||
STATE_NUMERATOR_PREP = 4'd12,
|
||||
STATE_RECON_FACTORS = 4'd13,
|
||||
STATE_ERRVAL_PREP = 4'd14
|
||||
} quant_state_e;
|
||||
|
||||
// One-hot state decode keeps per-stage enables shallow. This is important
|
||||
// when explicit timing-boundary registers below are preserved for 250 MHz.
|
||||
(* fsm_encoding = "one_hot" *) quant_state_e state;
|
||||
|
||||
// Latched event fields.
|
||||
logic [PIX_WIDTH-1:0] sample_latched;
|
||||
logic [PIX_WIDTH-1:0] Px_latched;
|
||||
logic [12:0] x_latched;
|
||||
logic [12:0] y_latched;
|
||||
logic strip_first_latched;
|
||||
logic strip_last_latched;
|
||||
logic [8:0] context_index_latched;
|
||||
logic context_negative_latched;
|
||||
logic run_mode_latched;
|
||||
logic [31:0] A_latched;
|
||||
logic signed [31:0] B_latched;
|
||||
logic signed [8:0] C_latched;
|
||||
logic [15:0] N_latched;
|
||||
logic [16:0] RANGE_latched;
|
||||
logic [4:0] qbpp_latched;
|
||||
logic [6:0] LIMIT_latched;
|
||||
logic [5:0] NEAR_latched;
|
||||
logic signed [32:0] oriented_error_latched;
|
||||
logic quotient_negative_latched;
|
||||
|
||||
// Reciprocal-division registers and combinational next values.
|
||||
logic [DIV_WIDTH-1:0] div_dividend;
|
||||
logic [DIV_WIDTH-1:0] div_quotient;
|
||||
logic [5:0] div_denominator;
|
||||
logic [RECIP_MAGIC_WIDTH-1:0] div_magic;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_product;
|
||||
logic [5:0] divisor_small_next;
|
||||
logic [RECIP_MAGIC_WIDTH-1:0] reciprocal_magic_next;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_dividend_product_ext;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_magic_product_ext;
|
||||
logic [RECIP_PRODUCT_WIDTH-1:0] div_product_next;
|
||||
logic [DIV_WIDTH-1:0] recip_quotient_est;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_quotient_est_ext;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_divisor_ext;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_check_product;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_dividend_ext;
|
||||
logic [DIV_WIDTH-1:0] recip_quotient_est_latched;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_check_product_latched;
|
||||
logic [RECIP_CHECK_WIDTH-1:0] recip_dividend_ext_latched;
|
||||
logic [DIV_WIDTH-1:0] recip_quotient_corrected;
|
||||
|
||||
// Input arithmetic.
|
||||
logic signed [32:0] sample_ext;
|
||||
logic signed [32:0] Px_ext;
|
||||
logic signed [32:0] sample_minus_px;
|
||||
logic signed [32:0] oriented_error_next;
|
||||
logic signed [32:0] neg_oriented_error_next;
|
||||
logic quotient_negative_next;
|
||||
logic [DIV_WIDTH-1:0] division_numerator_next;
|
||||
logic signed [32:0] division_numerator_positive;
|
||||
logic signed [32:0] division_numerator_negative;
|
||||
// Result arithmetic.
|
||||
logic signed [32:0] quotient_signed;
|
||||
logic signed [32:0] raw_Errval;
|
||||
logic signed [32:0] range_ext;
|
||||
logic signed [32:0] range_midpoint_ext;
|
||||
logic signed [32:0] modulo_Errval_after_add;
|
||||
logic signed [32:0] modulo_Errval_after_add_latched;
|
||||
logic signed [32:0] modulo_Errval;
|
||||
logic signed [32:0] sign_restored_Errval;
|
||||
logic signed [32:0] modulo_Errval_latched;
|
||||
// Timing boundary between Annex A.5 modulo/sign restoration and
|
||||
// reconstructed-sample dequantization. This register remains as a visible
|
||||
// pipeline stage for the odd-scale carry-chain multiplier used below.
|
||||
logic signed [32:0] sign_restored_Errval_latched;
|
||||
logic signed [32:0] sign_restored_mul_latched;
|
||||
logic signed [6:0] near_scale_latched;
|
||||
logic signed [40:0] dequantized_error;
|
||||
logic signed [40:0] dequantized_error_latched;
|
||||
logic signed [40:0] reconstruction_base;
|
||||
logic signed [40:0] reconstruction_base_latched;
|
||||
logic signed [40:0] reconstruction_sum;
|
||||
logic signed [40:0] reconstruction_sum_latched;
|
||||
logic signed [40:0] range_scaled;
|
||||
logic signed [40:0] range_scaled_latched;
|
||||
logic signed [40:0] reconstruction_fixed;
|
||||
logic signed [40:0] reconstruction_fixed_latched;
|
||||
logic signed [40:0] maxval_ext;
|
||||
logic signed [40:0] maxval_ext_latched;
|
||||
logic signed [40:0] near_ext;
|
||||
logic signed [40:0] near_ext_latched;
|
||||
logic signed [40:0] maxval_plus_near_latched;
|
||||
logic signed [40:0] negative_near_latched;
|
||||
logic [PIX_WIDTH-1:0] reconstructed_next;
|
||||
logic [PIX_WIDTH-1:0] reconstructed_calc_latched;
|
||||
|
||||
// Handshake and acceptance terms.
|
||||
logic output_slot_open;
|
||||
logic accept_corrected;
|
||||
|
||||
// Shared odd-scale multipliers for Annex A.5 reconstruction terms.
|
||||
jls_near_scale_mul #(
|
||||
.INPUT_WIDTH(33),
|
||||
.OUTPUT_WIDTH(41)
|
||||
) regular_recon_err_mul_i (
|
||||
.multiplicand_i(sign_restored_mul_latched),
|
||||
.near_scale_i(near_scale_latched[5:0]),
|
||||
.product_o(dequantized_error)
|
||||
);
|
||||
|
||||
jls_near_scale_mul #(
|
||||
.INPUT_WIDTH(18),
|
||||
.OUTPUT_WIDTH(41)
|
||||
) regular_recon_range_mul_i (
|
||||
.multiplicand_i($signed({1'b0, RANGE_latched})),
|
||||
.near_scale_i(near_scale_latched[5:0]),
|
||||
.product_o(range_scaled)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
output_slot_open = 1'b0;
|
||||
if (!err_valid || err_ready) begin
|
||||
output_slot_open = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Timing note: input acceptance is decoupled from err_ready. This state
|
||||
// machine has several cycles before STATE_FINISH, so a pending err_valid
|
||||
// can drain while the next pixel is being processed. If it has not drained
|
||||
// by STATE_FINISH, the output_slot_open check below holds the result.
|
||||
corrected_ready = 1'b0;
|
||||
if (state == STATE_IDLE) begin
|
||||
corrected_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accept_corrected = 1'b0;
|
||||
if (corrected_valid && corrected_ready) begin
|
||||
accept_corrected = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
sample_ext = $signed({17'd0, sample_latched});
|
||||
Px_ext = $signed({17'd0, Px_latched});
|
||||
sample_minus_px = sample_ext - Px_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
oriented_error_next = sample_minus_px;
|
||||
if (context_negative_latched) begin
|
||||
oriented_error_next = -sample_minus_px;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
neg_oriented_error_next = -oriented_error_latched;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
quotient_negative_next = 1'b1;
|
||||
division_numerator_positive = oriented_error_latched + $signed({27'd0, NEAR_latched});
|
||||
division_numerator_negative = neg_oriented_error_next + $signed({27'd0, NEAR_latched});
|
||||
division_numerator_next = division_numerator_negative[DIV_WIDTH-1:0];
|
||||
if (oriented_error_latched > 33'sd0) begin
|
||||
quotient_negative_next = 1'b0;
|
||||
division_numerator_next = division_numerator_positive[DIV_WIDTH-1:0];
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
divisor_small_next = {NEAR_latched[4:0], 1'b1};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// ceil(2^24 / (2*NEAR+1)) for NEAR=1..31. The next pipeline stage
|
||||
// corrects the possible +1 quotient overshoot by checking q*d > n.
|
||||
reciprocal_magic_next = 23'd0;
|
||||
case (NEAR_latched[4:0])
|
||||
5'd1: begin
|
||||
reciprocal_magic_next = 23'd5592406;
|
||||
end
|
||||
|
||||
5'd2: begin
|
||||
reciprocal_magic_next = 23'd3355444;
|
||||
end
|
||||
|
||||
5'd3: begin
|
||||
reciprocal_magic_next = 23'd2396746;
|
||||
end
|
||||
|
||||
5'd4: begin
|
||||
reciprocal_magic_next = 23'd1864136;
|
||||
end
|
||||
|
||||
5'd5: begin
|
||||
reciprocal_magic_next = 23'd1525202;
|
||||
end
|
||||
|
||||
5'd6: begin
|
||||
reciprocal_magic_next = 23'd1290556;
|
||||
end
|
||||
|
||||
5'd7: begin
|
||||
reciprocal_magic_next = 23'd1118482;
|
||||
end
|
||||
|
||||
5'd8: begin
|
||||
reciprocal_magic_next = 23'd986896;
|
||||
end
|
||||
|
||||
5'd9: begin
|
||||
reciprocal_magic_next = 23'd883012;
|
||||
end
|
||||
|
||||
5'd10: begin
|
||||
reciprocal_magic_next = 23'd798916;
|
||||
end
|
||||
|
||||
5'd11: begin
|
||||
reciprocal_magic_next = 23'd729445;
|
||||
end
|
||||
|
||||
5'd12: begin
|
||||
reciprocal_magic_next = 23'd671089;
|
||||
end
|
||||
|
||||
5'd13: begin
|
||||
reciprocal_magic_next = 23'd621379;
|
||||
end
|
||||
|
||||
5'd14: begin
|
||||
reciprocal_magic_next = 23'd578525;
|
||||
end
|
||||
|
||||
5'd15: begin
|
||||
reciprocal_magic_next = 23'd541201;
|
||||
end
|
||||
|
||||
5'd16: begin
|
||||
reciprocal_magic_next = 23'd508401;
|
||||
end
|
||||
|
||||
5'd17: begin
|
||||
reciprocal_magic_next = 23'd479350;
|
||||
end
|
||||
|
||||
5'd18: begin
|
||||
reciprocal_magic_next = 23'd453439;
|
||||
end
|
||||
|
||||
5'd19: begin
|
||||
reciprocal_magic_next = 23'd430186;
|
||||
end
|
||||
|
||||
5'd20: begin
|
||||
reciprocal_magic_next = 23'd409201;
|
||||
end
|
||||
|
||||
5'd21: begin
|
||||
reciprocal_magic_next = 23'd390168;
|
||||
end
|
||||
|
||||
5'd22: begin
|
||||
reciprocal_magic_next = 23'd372828;
|
||||
end
|
||||
|
||||
5'd23: begin
|
||||
reciprocal_magic_next = 23'd356963;
|
||||
end
|
||||
|
||||
5'd24: begin
|
||||
reciprocal_magic_next = 23'd342393;
|
||||
end
|
||||
|
||||
5'd25: begin
|
||||
reciprocal_magic_next = 23'd328966;
|
||||
end
|
||||
|
||||
5'd26: begin
|
||||
reciprocal_magic_next = 23'd316552;
|
||||
end
|
||||
|
||||
5'd27: begin
|
||||
reciprocal_magic_next = 23'd305041;
|
||||
end
|
||||
|
||||
5'd28: begin
|
||||
reciprocal_magic_next = 23'd294338;
|
||||
end
|
||||
|
||||
5'd29: begin
|
||||
reciprocal_magic_next = 23'd284360;
|
||||
end
|
||||
|
||||
5'd30: begin
|
||||
reciprocal_magic_next = 23'd275037;
|
||||
end
|
||||
|
||||
5'd31: begin
|
||||
reciprocal_magic_next = 23'd266306;
|
||||
end
|
||||
|
||||
default: begin
|
||||
reciprocal_magic_next = 23'd0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
div_dividend_product_ext = {{RECIP_MAGIC_WIDTH{1'b0}}, div_dividend};
|
||||
div_magic_product_ext = {{DIV_WIDTH{1'b0}}, div_magic};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
div_product_next = div_dividend_product_ext * div_magic_product_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_quotient_est = div_product >> RECIP_SHIFT;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_quotient_est_ext = {{6{1'b0}}, recip_quotient_est};
|
||||
recip_divisor_ext = {{DIV_WIDTH{1'b0}}, div_denominator};
|
||||
recip_dividend_ext = {{6{1'b0}}, div_dividend};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_check_product = recip_quotient_est_ext * recip_divisor_ext;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
recip_quotient_corrected = recip_quotient_est_latched;
|
||||
if (recip_check_product_latched > recip_dividend_ext_latched) begin
|
||||
recip_quotient_corrected = recip_quotient_est_latched - {{(DIV_WIDTH-1){1'b0}}, 1'b1};
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
quotient_signed = $signed({16'd0, div_quotient});
|
||||
if (quotient_negative_latched) begin
|
||||
quotient_signed = -$signed({16'd0, div_quotient});
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
raw_Errval = quotient_signed;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
range_ext = $signed({16'd0, RANGE_latched});
|
||||
range_midpoint_ext = $signed({16'd0, ((RANGE_latched + 17'd1) >> 1)});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
modulo_Errval_after_add = raw_Errval;
|
||||
if (raw_Errval < 33'sd0) begin
|
||||
modulo_Errval_after_add = raw_Errval + range_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
modulo_Errval = modulo_Errval_after_add_latched;
|
||||
if (modulo_Errval_after_add_latched >= range_midpoint_ext) begin
|
||||
modulo_Errval = modulo_Errval_after_add_latched - range_ext;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
sign_restored_Errval = modulo_Errval_latched;
|
||||
if (context_negative_latched) begin
|
||||
sign_restored_Errval = -modulo_Errval_latched;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
// Annex A.5 reconstruction base Px. The odd-scale multiplier products are
|
||||
// computed by the shared helpers above and registered in STATE_RECON_MUL.
|
||||
reconstruction_base = $signed({25'd0, Px_latched});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
reconstruction_sum = reconstruction_base_latched + dequantized_error_latched;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
maxval_ext = (41'sd1 <<< PIX_WIDTH) - 41'sd1;
|
||||
near_ext = $signed({35'd0, NEAR_latched});
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
reconstruction_fixed = reconstruction_sum_latched;
|
||||
if (reconstruction_sum_latched < negative_near_latched) begin
|
||||
reconstruction_fixed = reconstruction_sum_latched + range_scaled_latched;
|
||||
end else if (reconstruction_sum_latched > maxval_plus_near_latched) begin
|
||||
reconstruction_fixed = reconstruction_sum_latched - range_scaled_latched;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
reconstructed_next = reconstruction_fixed_latched[PIX_WIDTH-1:0];
|
||||
if (reconstruction_fixed_latched < 41'sd0) begin
|
||||
reconstructed_next = {PIX_WIDTH{1'b0}};
|
||||
end else if (reconstruction_fixed_latched > maxval_ext_latched) begin
|
||||
reconstructed_next = {PIX_WIDTH{1'b1}};
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
state <= STATE_IDLE;
|
||||
sample_latched <= {PIX_WIDTH{1'b0}};
|
||||
Px_latched <= {PIX_WIDTH{1'b0}};
|
||||
x_latched <= 13'd0;
|
||||
y_latched <= 13'd0;
|
||||
strip_first_latched <= 1'b0;
|
||||
strip_last_latched <= 1'b0;
|
||||
context_index_latched <= 9'd0;
|
||||
context_negative_latched <= 1'b0;
|
||||
run_mode_latched <= 1'b0;
|
||||
A_latched <= 32'd0;
|
||||
B_latched <= 32'sd0;
|
||||
C_latched <= 9'sd0;
|
||||
N_latched <= 16'd0;
|
||||
RANGE_latched <= 17'd0;
|
||||
qbpp_latched <= 5'd0;
|
||||
LIMIT_latched <= 7'd0;
|
||||
NEAR_latched <= 6'd0;
|
||||
oriented_error_latched <= 33'sd0;
|
||||
quotient_negative_latched <= 1'b0;
|
||||
div_dividend <= {DIV_WIDTH{1'b0}};
|
||||
div_quotient <= {DIV_WIDTH{1'b0}};
|
||||
div_denominator <= 6'd0;
|
||||
div_magic <= {RECIP_MAGIC_WIDTH{1'b0}};
|
||||
div_product <= {RECIP_PRODUCT_WIDTH{1'b0}};
|
||||
recip_quotient_est_latched <= {DIV_WIDTH{1'b0}};
|
||||
recip_check_product_latched <= {RECIP_CHECK_WIDTH{1'b0}};
|
||||
recip_dividend_ext_latched <= {RECIP_CHECK_WIDTH{1'b0}};
|
||||
modulo_Errval_after_add_latched <= 33'sd0;
|
||||
modulo_Errval_latched <= 33'sd0;
|
||||
sign_restored_Errval_latched <= 33'sd0;
|
||||
sign_restored_mul_latched <= 33'sd0;
|
||||
near_scale_latched <= 7'sd1;
|
||||
dequantized_error_latched <= 41'sd0;
|
||||
reconstruction_base_latched <= 41'sd0;
|
||||
reconstruction_sum_latched <= 41'sd0;
|
||||
range_scaled_latched <= 41'sd0;
|
||||
reconstruction_fixed_latched <= 41'sd0;
|
||||
maxval_ext_latched <= 41'sd0;
|
||||
near_ext_latched <= 41'sd0;
|
||||
maxval_plus_near_latched <= 41'sd0;
|
||||
negative_near_latched <= 41'sd0;
|
||||
reconstructed_calc_latched <= {PIX_WIDTH{1'b0}};
|
||||
err_valid <= 1'b0;
|
||||
Errval <= 32'sd0;
|
||||
reconstructed_sample <= {PIX_WIDTH{1'b0}};
|
||||
err_x <= 13'd0;
|
||||
err_y <= 13'd0;
|
||||
err_strip_first_pixel <= 1'b0;
|
||||
err_strip_last_pixel <= 1'b0;
|
||||
err_context_index <= 9'd0;
|
||||
err_context_negative <= 1'b0;
|
||||
err_run_mode_context <= 1'b0;
|
||||
err_qbpp <= 5'd0;
|
||||
err_LIMIT <= 7'd0;
|
||||
err_A <= 32'd0;
|
||||
err_B <= 32'sd0;
|
||||
err_C <= 9'sd0;
|
||||
err_N <= 16'd0;
|
||||
end else begin
|
||||
if (err_valid && err_ready) begin
|
||||
err_valid <= 1'b0;
|
||||
end
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (accept_corrected) begin
|
||||
sample_latched <= corrected_sample;
|
||||
Px_latched <= corrected_Px;
|
||||
x_latched <= corrected_x;
|
||||
y_latched <= corrected_y;
|
||||
strip_first_latched <= corrected_strip_first_pixel;
|
||||
strip_last_latched <= corrected_strip_last_pixel;
|
||||
context_index_latched <= corrected_context_index;
|
||||
context_negative_latched <= corrected_context_negative;
|
||||
run_mode_latched <= corrected_run_mode_context;
|
||||
A_latched <= corrected_A;
|
||||
B_latched <= corrected_B;
|
||||
C_latched <= corrected_C;
|
||||
N_latched <= corrected_N;
|
||||
RANGE_latched <= RANGE;
|
||||
qbpp_latched <= qbpp;
|
||||
LIMIT_latched <= LIMIT;
|
||||
NEAR_latched <= NEAR;
|
||||
div_quotient <= {DIV_WIDTH{1'b0}};
|
||||
div_product <= {RECIP_PRODUCT_WIDTH{1'b0}};
|
||||
state <= STATE_INPUT_PREP;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_INPUT_PREP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval = Ix - Px, sign orientation, |Errval| + NEAR
|
||||
// Stage note : Corrected input fields were captured in STATE_IDLE.
|
||||
// This stage registers the divisor numerator before the reciprocal
|
||||
// DSP multiply, breaking corrected_sample/Px to div_product timing.
|
||||
oriented_error_latched <= oriented_error_next;
|
||||
state <= STATE_NUMERATOR_PREP;
|
||||
end
|
||||
|
||||
STATE_NUMERATOR_PREP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : compute quotient sign and numerator for NEAR division
|
||||
// Stage note : Uses oriented_error_latched from STATE_INPUT_PREP so
|
||||
// the Ix-Px subtract/sign mux is separated from |Errval| + NEAR and
|
||||
// the DSP B-input register used by the reciprocal multiply.
|
||||
quotient_negative_latched <= quotient_negative_next;
|
||||
|
||||
if (NEAR_latched == 6'd0) begin
|
||||
div_quotient <= division_numerator_next;
|
||||
state <= STATE_ERRVAL_PREP;
|
||||
end else begin
|
||||
div_dividend <= division_numerator_next;
|
||||
div_denominator <= divisor_small_next;
|
||||
div_magic <= reciprocal_magic_next;
|
||||
state <= STATE_DIV_MUL;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_DIV_MUL: begin
|
||||
div_product <= div_product_next;
|
||||
state <= STATE_DIV_CHECK;
|
||||
end
|
||||
|
||||
STATE_DIV_CHECK: begin
|
||||
// Stage note : Register q*d and dividend before the final quotient
|
||||
// correction. This keeps the DSP product output out of the carry
|
||||
// chain that subtracts one from the reciprocal quotient estimate.
|
||||
recip_quotient_est_latched <= recip_quotient_est;
|
||||
recip_check_product_latched <= recip_check_product;
|
||||
recip_dividend_ext_latched <= recip_dividend_ext;
|
||||
state <= STATE_DIV_CORRECT;
|
||||
end
|
||||
|
||||
STATE_DIV_CORRECT: begin
|
||||
div_quotient <= recip_quotient_corrected;
|
||||
state <= STATE_ERRVAL_PREP;
|
||||
end
|
||||
|
||||
STATE_ERRVAL_PREP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization and modulo reduction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval modulo normalization, first wrap step
|
||||
// Stage note : Capture Errval+RANGE before the midpoint compare so
|
||||
// div_quotient no longer feeds both carry chains in one cycle.
|
||||
modulo_Errval_after_add_latched <= modulo_Errval_after_add;
|
||||
state <= STATE_ERRVAL;
|
||||
end
|
||||
|
||||
STATE_ERRVAL: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization and modulo reduction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval modulo normalization, midpoint wrap step
|
||||
// Stage note : STATE_ERRVAL_PREP already registered the first modulo
|
||||
// add; this state now contains only the midpoint compare/subtract.
|
||||
modulo_Errval_latched <= modulo_Errval;
|
||||
state <= STATE_ERRVAL_SIGN;
|
||||
end
|
||||
|
||||
STATE_ERRVAL_SIGN: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 Errval quantization and modulo reduction
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : restore Errval sign after modulo normalization
|
||||
// Stage note : modulo_Errval_latched separates the divider/modulo
|
||||
// carry chain from the context sign mux and reconstruction DSP input.
|
||||
sign_restored_Errval_latched <= sign_restored_Errval;
|
||||
state <= STATE_RECON_FACTORS;
|
||||
end
|
||||
|
||||
STATE_RECON_FACTORS: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : prepare Errval and (2*NEAR+1) for dequantization
|
||||
// Stage note : Explicit operand registers give the reconstruction
|
||||
// odd-scale multiplier a clean input boundary before Errval*(2*NEAR+1).
|
||||
sign_restored_mul_latched <= sign_restored_Errval_latched;
|
||||
near_scale_latched <= $signed({NEAR_latched, 1'b1});
|
||||
state <= STATE_RECON_MUL;
|
||||
end
|
||||
|
||||
STATE_RECON_MUL: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Errval * (2*NEAR+1) and RANGE * (2*NEAR+1)
|
||||
// Stage note : Register products before wrap/clamp to reduce the
|
||||
// NEAR-to-Rx combinational depth at the 250 MHz target.
|
||||
dequantized_error_latched <= dequantized_error;
|
||||
reconstruction_base_latched <= reconstruction_base;
|
||||
range_scaled_latched <= range_scaled;
|
||||
maxval_ext_latched <= maxval_ext;
|
||||
near_ext_latched <= near_ext;
|
||||
state <= STATE_RECON_SUM;
|
||||
end
|
||||
|
||||
STATE_RECON_SUM: begin
|
||||
// Stage note : Register the reconstruction sum and wrap thresholds
|
||||
// before Annex A.5 range wrapping. This splits maxval/near boundary
|
||||
// comparison from the add/subtract that forms reconstruction_fixed.
|
||||
reconstruction_sum_latched <= reconstruction_sum;
|
||||
maxval_plus_near_latched <= maxval_ext_latched + near_ext_latched;
|
||||
negative_near_latched <= -near_ext_latched;
|
||||
state <= STATE_RECON_CALC;
|
||||
end
|
||||
|
||||
STATE_RECON_CALC: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : wrap Px + Errval * (2*NEAR+1) into the extended range
|
||||
// Stage note : Register the JPEG-LS wrap result before the final
|
||||
// [0, MAXVAL] clamp, splitting the reported maxval-to-Rx path.
|
||||
reconstruction_fixed_latched <= reconstruction_fixed;
|
||||
state <= STATE_RECON_CLAMP;
|
||||
end
|
||||
|
||||
STATE_RECON_CLAMP: begin
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.5 reconstructed sample Rx
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Rx = clamp(wrapped reconstruction, 0, MAXVAL)
|
||||
// Stage note : The final sample clamp is isolated from the preceding
|
||||
// wrap add/subtract logic for 250 MHz timing closure.
|
||||
reconstructed_calc_latched <= reconstructed_next;
|
||||
state <= STATE_FINISH;
|
||||
end
|
||||
|
||||
STATE_FINISH: begin
|
||||
if (output_slot_open) begin
|
||||
err_valid <= 1'b1;
|
||||
Errval <= modulo_Errval_latched[31:0];
|
||||
reconstructed_sample <= reconstructed_calc_latched;
|
||||
err_x <= x_latched;
|
||||
err_y <= y_latched;
|
||||
err_strip_first_pixel <= strip_first_latched;
|
||||
err_strip_last_pixel <= strip_last_latched;
|
||||
err_context_index <= context_index_latched;
|
||||
err_context_negative <= context_negative_latched;
|
||||
err_run_mode_context <= run_mode_latched;
|
||||
err_qbpp <= qbpp_latched;
|
||||
err_LIMIT <= LIMIT_latched;
|
||||
err_A <= A_latched;
|
||||
err_B <= B_latched;
|
||||
err_C <= C_latched;
|
||||
err_N <= N_latched;
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1476
fpga/verilog/jls_run_mode.sv
Normal file
1476
fpga/verilog/jls_run_mode.sv
Normal file
File diff suppressed because it is too large
Load Diff
364
fpga/verilog/jls_scan_ctrl.sv
Normal file
364
fpga/verilog/jls_scan_ctrl.sv
Normal file
@@ -0,0 +1,364 @@
|
||||
// Standard : ITU-T T.87 (06/1998) / ISO/IEC 14495-1 JPEG-LS Baseline
|
||||
// Clause : Annex A.8 control procedure, Annex D.1-D.3 scan control
|
||||
// Figure : N/A
|
||||
// Table : N/A
|
||||
// Pseudocode : Start one JPEG-LS scan per standalone strip frame
|
||||
// Trace : docs/jls_traceability.md#jls-scan-control
|
||||
// Example : The first pixel of each strip emits strip_start_valid.
|
||||
//
|
||||
// Scan controller for the strip-frame architecture. It converts pixel boundary
|
||||
// flags from jls_input_ctrl into strip start/finish commands and forwards the
|
||||
// pixel stream to the later predictor/context pipeline. A one-entry registered
|
||||
// slot breaks the input pixel_valid path away from downstream strip-start and
|
||||
// context ready/CE controls while still allowing one accepted pixel per cycle
|
||||
// when the slot drains and refills in the same cycle.
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module jls_scan_ctrl #(
|
||||
// Compile-time grayscale sample precision. Legal values: 8, 10, 12, 14, 16.
|
||||
parameter int PIX_WIDTH = 16,
|
||||
|
||||
// Number of original-image rows in one standalone JPEG-LS strip frame.
|
||||
parameter int SCAN_ROWS = 16
|
||||
) (
|
||||
// Main 250 MHz clock.
|
||||
input var logic clk,
|
||||
|
||||
// Synchronous active-high reset.
|
||||
input var logic rst,
|
||||
|
||||
// Pixel event from jls_input_ctrl is valid.
|
||||
input var logic pixel_valid,
|
||||
|
||||
// This controller accepted the current input pixel event.
|
||||
output logic pixel_ready,
|
||||
|
||||
// Grayscale input sample.
|
||||
input var logic [PIX_WIDTH-1:0] pixel_sample,
|
||||
|
||||
// Zero-based original-image column coordinate.
|
||||
input var logic [12:0] pixel_x,
|
||||
|
||||
// Zero-based original-image row coordinate.
|
||||
input var logic [12:0] pixel_y,
|
||||
|
||||
// First pixel of the current standalone strip frame.
|
||||
input var logic strip_first_pixel,
|
||||
|
||||
// Last pixel of the current standalone strip frame.
|
||||
input var logic strip_last_pixel,
|
||||
|
||||
// First pixel of the current original input image.
|
||||
input var logic image_first_pixel,
|
||||
|
||||
// Last pixel of the current original input image.
|
||||
input var logic image_last_pixel,
|
||||
|
||||
// Effective image width after runtime validation and fallback.
|
||||
input var logic [12:0] active_pic_col,
|
||||
|
||||
// Latched ratio for the current original image.
|
||||
input var logic [3:0] active_ratio,
|
||||
|
||||
// Dynamic NEAR value from jls_near_ctrl for non-first strips.
|
||||
input var logic [5:0] current_near,
|
||||
|
||||
// Pixel event forwarded to the predictor/context pipeline.
|
||||
output logic enc_pixel_valid,
|
||||
|
||||
// Downstream predictor/context pipeline can accept the forwarded pixel.
|
||||
input var logic enc_pixel_ready,
|
||||
|
||||
// Forwarded grayscale sample.
|
||||
output logic [PIX_WIDTH-1:0] enc_pixel_sample,
|
||||
|
||||
// Forwarded original-image column coordinate.
|
||||
output logic [12:0] enc_pixel_x,
|
||||
|
||||
// Forwarded original-image row coordinate.
|
||||
output logic [12:0] enc_pixel_y,
|
||||
|
||||
// Forwarded row-last flag, registered with enc_pixel_x/y. This is distinct
|
||||
// from enc_strip_last_pixel, which marks the last pixel of the whole strip.
|
||||
output logic enc_row_last_pixel,
|
||||
|
||||
// Forwarded first-pixel flag for strip-local boundary handling.
|
||||
output logic enc_strip_first_pixel,
|
||||
|
||||
// Forwarded last-pixel flag for strip-local flush handling.
|
||||
output logic enc_strip_last_pixel,
|
||||
|
||||
// Strip start command for jls_header_writer.
|
||||
output logic strip_start_valid,
|
||||
|
||||
// Header writer can accept a strip start command.
|
||||
input var logic strip_start_ready,
|
||||
|
||||
// Marks the first strip of an original input image.
|
||||
output logic original_image_first_strip,
|
||||
|
||||
// Strip frame width written to SOF55.X.
|
||||
output logic [12:0] strip_width,
|
||||
|
||||
// Strip frame height written to SOF55.Y.
|
||||
output logic [12:0] strip_height,
|
||||
|
||||
// NEAR value used by this strip frame.
|
||||
output logic [5:0] strip_near,
|
||||
|
||||
// Strip finish command after the last strip pixel enters the encode pipeline.
|
||||
output logic strip_finish_valid,
|
||||
|
||||
// Downstream finish handler can accept the strip finish command.
|
||||
input var logic strip_finish_ready,
|
||||
|
||||
// Marks the last strip of an original input image.
|
||||
output logic original_image_last_strip,
|
||||
|
||||
// Number of pixels in the completed strip frame.
|
||||
output logic [31:0] strip_pixel_count,
|
||||
|
||||
// Original-image start pulse for jls_near_ctrl.
|
||||
output logic near_image_start_valid,
|
||||
|
||||
// Ratio forwarded to jls_near_ctrl at original-image start.
|
||||
output logic [3:0] near_image_ratio
|
||||
);
|
||||
|
||||
// Strip height as a sized SOF55.Y field.
|
||||
localparam logic [12:0] SCAN_ROWS_VALUE = SCAN_ROWS;
|
||||
|
||||
// Running count of pixels accepted in the current strip.
|
||||
logic [31:0] strip_pixel_count_running;
|
||||
logic [31:0] strip_pixel_count_next;
|
||||
|
||||
// One-entry timing slot between the FIFO-facing input controller and the
|
||||
// downstream JPEG-LS strip/encode pipeline.
|
||||
logic slot_valid;
|
||||
logic [PIX_WIDTH-1:0] slot_sample;
|
||||
logic [12:0] slot_x;
|
||||
logic [12:0] slot_y;
|
||||
logic slot_strip_first_pixel;
|
||||
logic slot_strip_last_pixel;
|
||||
logic slot_row_last_pixel;
|
||||
logic slot_image_first_pixel;
|
||||
logic slot_image_last_pixel;
|
||||
logic [12:0] slot_active_pic_col;
|
||||
logic [5:0] slot_strip_near;
|
||||
|
||||
// Independent readiness terms for input loading, strip commands, and encode
|
||||
// pipeline forwarding.
|
||||
logic input_start_path_ready;
|
||||
logic input_finish_path_ready;
|
||||
logic input_boundary_ready;
|
||||
logic start_path_ready;
|
||||
logic finish_path_ready;
|
||||
logic all_paths_ready;
|
||||
logic slot_open_for_input;
|
||||
logic accepted_input;
|
||||
logic forward_slot;
|
||||
logic [12:0] input_row_last_col;
|
||||
logic input_row_last_pixel;
|
||||
|
||||
// First-strip NEAR must be zero even if jls_near_ctrl has not yet reset on
|
||||
// the same SOF pixel cycle.
|
||||
logic [5:0] selected_strip_near;
|
||||
|
||||
always_comb begin
|
||||
input_start_path_ready = 1'b1;
|
||||
if (strip_first_pixel && !strip_start_ready) begin
|
||||
input_start_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_finish_path_ready = 1'b1;
|
||||
if (strip_last_pixel && !strip_finish_ready) begin
|
||||
input_finish_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_boundary_ready = 1'b0;
|
||||
if (input_start_path_ready && input_finish_path_ready) begin
|
||||
input_boundary_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
start_path_ready = 1'b1;
|
||||
if (slot_strip_first_pixel && !strip_start_ready) begin
|
||||
start_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
finish_path_ready = 1'b1;
|
||||
if (slot_strip_last_pixel && !strip_finish_ready) begin
|
||||
finish_path_ready = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
all_paths_ready = 1'b0;
|
||||
if (start_path_ready && finish_path_ready && enc_pixel_ready) begin
|
||||
all_paths_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
forward_slot = 1'b0;
|
||||
if (slot_valid && all_paths_ready) begin
|
||||
forward_slot = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
slot_open_for_input = 1'b0;
|
||||
if (!slot_valid || forward_slot) begin
|
||||
slot_open_for_input = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
pixel_ready = 1'b0;
|
||||
if (input_boundary_ready && slot_open_for_input) begin
|
||||
pixel_ready = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
accepted_input = 1'b0;
|
||||
if (pixel_valid && pixel_ready) begin
|
||||
accepted_input = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
enc_pixel_valid = 1'b0;
|
||||
if (slot_valid && start_path_ready && finish_path_ready) begin
|
||||
enc_pixel_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
enc_pixel_sample = slot_sample;
|
||||
enc_pixel_x = slot_x;
|
||||
enc_pixel_y = slot_y;
|
||||
enc_row_last_pixel = slot_row_last_pixel;
|
||||
enc_strip_first_pixel = slot_strip_first_pixel;
|
||||
enc_strip_last_pixel = slot_strip_last_pixel;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_start_valid = 1'b0;
|
||||
if (slot_valid && slot_strip_first_pixel && finish_path_ready && enc_pixel_ready) begin
|
||||
strip_start_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_finish_valid = 1'b0;
|
||||
if (slot_valid && slot_strip_last_pixel && start_path_ready && enc_pixel_ready) begin
|
||||
strip_finish_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
original_image_first_strip = slot_image_first_pixel;
|
||||
original_image_last_strip = slot_image_last_pixel;
|
||||
strip_width = slot_active_pic_col;
|
||||
strip_height = SCAN_ROWS_VALUE;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
selected_strip_near = current_near;
|
||||
if (image_first_pixel) begin
|
||||
selected_strip_near = 6'd0;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_row_last_col = active_pic_col - 13'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
input_row_last_pixel = 1'b0;
|
||||
if (pixel_x == input_row_last_col) begin
|
||||
input_row_last_pixel = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_near = slot_strip_near;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_pixel_count_next = strip_pixel_count_running + 32'd1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
strip_pixel_count = 32'd0;
|
||||
if (slot_strip_last_pixel) begin
|
||||
strip_pixel_count = strip_pixel_count_next;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_image_start_valid = 1'b0;
|
||||
if (accepted_input && image_first_pixel) begin
|
||||
near_image_start_valid = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
near_image_ratio = active_ratio;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
slot_valid <= 1'b0;
|
||||
slot_sample <= {PIX_WIDTH{1'b0}};
|
||||
slot_x <= 13'd0;
|
||||
slot_y <= 13'd0;
|
||||
slot_strip_first_pixel <= 1'b0;
|
||||
slot_strip_last_pixel <= 1'b0;
|
||||
slot_row_last_pixel <= 1'b0;
|
||||
slot_image_first_pixel <= 1'b0;
|
||||
slot_image_last_pixel <= 1'b0;
|
||||
slot_active_pic_col <= 13'd0;
|
||||
slot_strip_near <= 6'd0;
|
||||
strip_pixel_count_running <= 32'd0;
|
||||
end else begin
|
||||
if (forward_slot) begin
|
||||
if (slot_strip_last_pixel) begin
|
||||
strip_pixel_count_running <= 32'd0;
|
||||
end else if (slot_strip_first_pixel) begin
|
||||
strip_pixel_count_running <= 32'd1;
|
||||
end else begin
|
||||
strip_pixel_count_running <= strip_pixel_count_next;
|
||||
end
|
||||
end
|
||||
|
||||
if (accepted_input) begin
|
||||
slot_valid <= 1'b1;
|
||||
slot_sample <= pixel_sample;
|
||||
slot_x <= pixel_x;
|
||||
slot_y <= pixel_y;
|
||||
slot_strip_first_pixel <= strip_first_pixel;
|
||||
slot_strip_last_pixel <= strip_last_pixel;
|
||||
slot_row_last_pixel <= input_row_last_pixel;
|
||||
slot_image_first_pixel <= image_first_pixel;
|
||||
slot_image_last_pixel <= image_last_pixel;
|
||||
slot_active_pic_col <= active_pic_col;
|
||||
slot_strip_near <= selected_strip_near;
|
||||
end else if (forward_slot) begin
|
||||
slot_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`default_nettype wire
|
||||
1682
fpga/verilog/jpeg_ls_encoder_top.sv
Normal file
1682
fpga/verilog/jpeg_ls_encoder_top.sv
Normal file
File diff suppressed because it is too large
Load Diff
24
fpga/verilog/jpeg_ls_rtl.f
Normal file
24
fpga/verilog/jpeg_ls_rtl.f
Normal file
@@ -0,0 +1,24 @@
|
||||
fpga/verilog/jls_common_pkg.sv
|
||||
fpga/verilog/jls_preset_defaults.sv
|
||||
fpga/verilog/jls_coding_params.sv
|
||||
fpga/verilog/jls_input_ctrl.sv
|
||||
fpga/verilog/jls_scan_ctrl.sv
|
||||
fpga/verilog/jls_neighbor_provider.sv
|
||||
fpga/verilog/jls_mode_router.sv
|
||||
fpga/verilog/jls_predictor.sv
|
||||
fpga/verilog/jls_context_quantizer.sv
|
||||
fpga/verilog/jls_context_model.sv
|
||||
fpga/verilog/jls_prediction_corrector.sv
|
||||
fpga/verilog/jls_near_scale_mul.sv
|
||||
fpga/verilog/jls_regular_error_quantizer.sv
|
||||
fpga/verilog/jls_header_writer.sv
|
||||
fpga/verilog/jls_near_ctrl.sv
|
||||
fpga/verilog/jls_context_memory.sv
|
||||
fpga/verilog/jls_context_update.sv
|
||||
fpga/verilog/jls_error_mapper.sv
|
||||
fpga/verilog/jls_run_mode.sv
|
||||
fpga/verilog/jls_golomb_encoder.sv
|
||||
fpga/verilog/jls_bit_packer.sv
|
||||
fpga/verilog/jls_byte_arbiter.sv
|
||||
fpga/verilog/jls_output_buffer.sv
|
||||
fpga/verilog/jpeg_ls_encoder_top.sv
|
||||
Reference in New Issue
Block a user