仿真平台内核初版 -tlib库包含<sparc arm riscv powerPC>

2026-02-07 20:43:43 +08:00
parent de61f9e2b0
commit b3117648be
9748 changed files with 4309137 additions and 0 deletions
--- a/tools/csv2resd/README.md
+++ b/tools/csv2resd/README.md
@@ -0,0 +1,52 @@
+# csv-to-resd
+
+This directory contains the CSV2RESD tool, which allows converting CSV files to RESD (**RE**node **S**ensor **D**ata) file format.
+
+## Usage
+
+### Syntax
+`./csv2resd.py [GROUP1] [GROUP2] [GROUP2] ...`
+
+`GROUP ::= --input <csv-file> [--offset <offset>] [--count <count>] [--map <type>:<field(s)>:<target(s)>*:<channel>*] --start-time <start-time> --frequency <frequency> --timestamp <timestamp>`
+
+Syntax allows for multiple specification of group, where `--input` is a delimiter between groups.
+For each `--input`, multiple mappings (`--map`) can be specified. The `*` in `--map` signs, that given property is optional:
+`--map <type>:<field(s)>`, `--map <type>:<field(s)>:<target(s)>`, `--map <type>:<field(s)>:<target(s)>:<channel>` and `--map <type>:<field>::<channel>` are all correct mappings.
+
+For more information, refer to `--help`.
+
+### Example
+
+`./csv2resd.py --input first.csv --map temperature:temp1::0 --map temperature:temp2::1 --start-time 0 --frequency 1 --input second.csv --map temperature:temp::2 --start-time 0 --frequency 1 output.resd`
+
+**first.csv**
+```
+temp1,temp2
+32502,32003
+32638,31603
+32633,31565
+33060,31975
+31617,32368
+32912,31284
+31813,31915
+31999,31961
+31811,32049
+31427,32409
+```
+
+**second.csv**
+```
+temp
+32139
+32253
+32402
+32004
+32037
+32698
+31687
+32658
+32452
+32300
+```
+
+Above example extracts `temp1` and `temp2` columns from `first.csv` and `temp` from `second.csv`, and then maps it to temperature channels `0`, `1` and `2` in RESD respectively.
--- a/tools/csv2resd/csv2resd.py
+++ b/tools/csv2resd/csv2resd.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2010-2025 Antmicro
+#
+# This file is licensed under the MIT License.
+# Full license text is available in 'licenses/MIT.txt'.
+#
+
+import argparse
+import sys
+import string
+from dataclasses import dataclass
+from typing import List, Optional
+import csv
+import resd
+
+from grammar import SAMPLE_TYPE, BLOCK_TYPE
+
+
+@dataclass
+class Mapping:
+    sample_type: SAMPLE_TYPE
+    map_from: List[str]
+    map_to: Optional[List[str]]
+    channel: int
+
+    def remap(self, row):
+        output = [self._retype(row[key]) for key in self.map_from]
+        if self.map_to:
+            output = dict(zip(self.map_to, output))
+        if isinstance(output, list) and len(output) == 1:
+            output = output[0]
+        return output
+
+    def _retype(self, value):
+        try:
+            if all(c.isdigit() for c in value.lstrip('-')):
+                return int(value)
+            elif all(c.isdigit() or c == '.' for c in value.lstrip('-')):
+                return float(value)
+            elif value[0] == '"' and value[-1] == '"':
+                return value[1:-1]
+            elif value[0] == '#' and all(c in string.hexdigits for c in value[1:]):
+                return bytes.fromhex(value[1:])
+        except ValueError:
+            return value
+
+
+def parse_mapping(mapping):
+    chunks = mapping.split(':')
+
+    if len(chunks) >= 3 and not chunks[2]:
+        chunks[2] = '_'
+
+    if not all(chunks) or (len(chunks) < 2 or len(chunks) > 4):
+        print(f'{mapping} is invalid mapping')
+        return None
+
+    possible_types = [type_ for type_ in SAMPLE_TYPE.encmapping if chunks[0].lower() in type_.lower()]
+    if not possible_types:
+        print(f'Invalid type: {chunks[0]}')
+        print(f'Possible types: {", ".join(SAMPLE_TYPE.ksymapping.values())}')
+        return None
+
+    if len(possible_types) > 1:
+        print(f'More than one type matches: {", ".join(type_ for _, type_ in possible_types)}')
+        return None
+
+    type_ = possible_types[0]
+    map_from = chunks[1].split(',')
+    map_to = chunks[2].split(',') if len(chunks) >= 3 and chunks[2] != '_' else None
+    channel = int(chunks[3]) if len(chunks) >= 4 else 0
+
+    return type_, map_from, map_to, channel
+
+
+def parse_arguments():
+    arguments = sys.argv[1:]
+
+    entry_parser = argparse.ArgumentParser()
+    entry_parser.add_argument('-i', '--input', required=True, help='path to csv file')
+    entry_parser.add_argument('-m', '--map', action='append', type=parse_mapping,
+        help='mapping in format <type>:<index/label>[:<to_property>:<channel>], multiple mappings are possible')
+    entry_parser.add_argument('-s', '--start-time', type=int, help='start time (in nanoseconds)')
+    entry_parser.add_argument('-f', '--frequency', type=float, help='frequency of the data (in Hz)')
+    entry_parser.add_argument('-t', '--timestamp', help='index/label of a column in the csv file for the timestamps (in nanoseconds)')
+    entry_parser.add_argument('-o', '--offset', type=int, default=0, help='number of samples to skip from the beginning of the file')
+    entry_parser.add_argument('-c', '--count', type=int, default=sys.maxsize, help='number of samples to parse')
+    entry_parser.add_argument('output', nargs='?', help='output file path')
+
+    if not arguments or any(v in ('-h', '--help') for v in arguments):
+        entry_parser.parse_args(['--help'])
+        sys.exit(0)
+
+    split_indices = [i for i, v in enumerate(arguments) if v in ('-i', '--input')]
+    split_indices.append(len(arguments))
+    subentries = [arguments[a:b] for a, b in zip(split_indices, split_indices[1:])]
+
+    entries = []
+    for subentry in subentries:
+        parsed = entry_parser.parse_args(subentry)
+        if parsed.frequency is None and parsed.timestamp is None:
+            print(f'{parsed.input}: either frequency or timestamp should be provided')
+            sys.exit(1)
+        if parsed.frequency and parsed.timestamp:
+            print(f'Data will be resampled to {parsed.frequency}Hz based on provided timestamps')
+
+        entries.append(parsed)
+
+    if entries and entries[-1].output is None:
+        entry_parser.parse_args(['--help'])
+        sys.exit(1)
+
+    return entries
+
+
+def map_source(labels, source):
+    if source is None:
+        return None
+
+    source = int(source) if all(c.isdigit() for c in source) else source
+    if isinstance(source, int) and 0 <= source < len(labels):
+        source = labels[source]
+
+    if source not in labels:
+        print(f'{source} is invalid source')
+        return None
+
+    return source
+
+
+def rebuild_mapping(labels, mapping):
+    map_from = mapping[1]
+
+    for i, src in enumerate(map_from):
+        src = map_source(labels, src)
+        if src is None:
+            return None
+        map_from[i] = src
+
+    return Mapping(mapping[0], map_from, mapping[2], mapping[3])
+
+
+if __name__ == '__main__':
+    arguments = parse_arguments()
+    output_file = arguments[-1].output
+
+    resd_file = resd.RESD(output_file)
+    for group in arguments:
+        block_type = BLOCK_TYPE.ARBITRARY_TIMESTAMP
+        resampling_mode = False
+        if group.frequency is not None:
+            block_type = BLOCK_TYPE.CONSTANT_FREQUENCY
+            if group.timestamp is not None:
+                # In resampling mode we use provided timestamps to generate constant frequency sample blocks.
+                # It allows to reconstruct RESD stream spanning long time periods from the sparse data.
+                # The idea is based on the default behavior of RESD, that allows for gaps between RESD blocks.
+                # On the other side, constant frequency sample blocks contain continuous, densely packed data,
+                # so we split samples into separate groups that are used to generate separate blocks.
+                # It is based on a simple heuristic:
+                # Samples with the same timestamps are grouped together and resampled to the frequency passed from the command line.
+                # Start time of the generated block is calculated as an offset to the previous timestamp + the initial start-time passed from the command line.
+                # Therefore for sparse data you often end up with the RESD file that consists of multiple blocks made of just one sample.
+                # Start time of the block calculated from the provided timestamps is crucial,
+                # because it translates to the virtual time during emulation, when the first sample from the block appears.
+                # Gaps can be handled directly in the model using RESD APIs.
+                # Usual behavior is to provide a default sample or repeat the last sample in the place of gaps.
+                # If your CSV file contains well spaced samples, it is better to not provide timestamps explicitly
+                # and generate a single block containing all samples.
+                resampling_mode = True
+
+        with open(group.input, 'rt') as csv_file:
+            csv_reader = csv.DictReader(csv_file)
+            labels = mapping = None
+            timestamp_source = None
+
+            to_skip = group.offset
+            to_parse = group.count
+
+            # These fields are used only in resampling mode to keep track of the block's start time.
+            # In resampling mode, data is automatically split into multiple blocks based on the timestamps.
+            prev_timestamp = None
+            start_offset = group.start_time
+
+            for row in csv_reader:
+                if labels is None:
+                    labels = list(row.keys())
+                    mappings = [rebuild_mapping(labels, mapping) for mapping in group.map]
+                    if block_type == BLOCK_TYPE.ARBITRARY_TIMESTAMP or resampling_mode:
+                        timestamp_source = map_source(labels, group.timestamp)
+                        if timestamp_source is None:
+                            sys.exit(1)
+
+                if to_skip > 0:
+                    to_skip -= 1
+                    continue
+
+                if to_parse == 0:
+                    break
+
+                for mapping in mappings:
+                    block = resd_file.get_block_or_create(mapping.sample_type, block_type, mapping.channel)
+                    if block_type == BLOCK_TYPE.CONSTANT_FREQUENCY:
+                        if resampling_mode:
+                            current_sample = mapping.remap(row)
+                            current_timestamp = int(row[timestamp_source])
+
+                            if prev_timestamp is None:
+                                # First block
+                                prev_timestamp = current_timestamp
+                                block.frequency = group.frequency
+                                block.start_time = start_offset
+
+                            if current_timestamp != prev_timestamp:
+                                resd_file.flush()
+                                block = resd_file.get_block_or_create(mapping.sample_type, block_type, mapping.channel)
+                                block.frequency = group.frequency
+                                start_offset += (current_timestamp - prev_timestamp) # Gap between blocks
+                                block.start_time = start_offset
+
+                            block.add_sample(current_sample)
+                            prev_timestamp = current_timestamp
+                        else:
+                            block.add_sample(mapping.remap(row))
+                    else:
+                        block.add_sample(mapping.remap(row), int(row[timestamp_source]))
+
+                to_parse -= 1
+
+        # In resampling mode, multiple blocks are usually generated from the single input
+        # so block properties are tracked ad hoc.
+        if not resampling_mode:
+            for mapping in mappings:
+                block = resd_file.get_block(mapping.sample_type, mapping.channel)
+                if block_type == BLOCK_TYPE.CONSTANT_FREQUENCY:
+                    block.frequency = group.frequency
+                if group.start_time is not None:
+                    block.start_time = group.start_time
+
+        resd_file.flush()
--- a/tools/csv2resd/grammar.py
+++ b/tools/csv2resd/grammar.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+
+from construct import *
+
+BLOCK_TYPE = Enum(Int8ul,
+    RESERVED            = 0x00,
+    ARBITRARY_TIMESTAMP = 0x01,
+    CONSTANT_FREQUENCY  = 0x02,
+)
+
+SAMPLE_TYPE = Enum(Int16ul,
+    RESERVED              = 0x0000,
+    TEMPERATURE           = 0x0001,
+    ACCELERATION          = 0x0002,
+    ANGULAR_RATE          = 0x0003,
+    VOLTAGE               = 0x0004,
+    ECG                   = 0x0005,
+    HUMIDITY              = 0x0006,
+    PRESSURE              = 0x0007,
+    MAGNETIC_FLUX_DENSITY = 0x0008,
+    BINARY_DATA           = 0x0009,
+
+    CUSTOM                = 0xF000,
+)
+
+resd_header = Struct(
+    "magic" / Const(b"RESD"),
+    "version" / Int8ul,
+    "reserved" / Padding(3)
+)
+
+blob = Struct(
+    "size" / Rebuild(Int32ul, len_(this.data)),
+    "data" / Int8ul[this.size],
+)
+
+data_block_metadata_item = Struct(
+    "key" / NullTerminated(GreedyRange(Int8ub)),
+    "type" / Int8ul,
+    "value" / Switch(this.type,
+    {
+        0x01: Int8sl,
+        0x02: Int8ul,
+        0x03: Int16sl,
+        0x04: Int16ul,
+        0x05: Int32sl,
+        0x06: Int32ul,
+        0x07: Int64sl,
+        0x08: Int64ul,
+        0x09: Float32l,
+        0x0A: Float64l,
+        0x0B: NullTerminated(GreedyRange(Int8ul)),
+        0x0C: blob,
+    }),
+)
+
+data_block_metadata = Struct(
+    "size" / Int64ul,
+    "items" / FixedSized(this.size, GreedyRange(data_block_metadata_item)),
+)
+
+data_block_sample = lambda sample_type: Switch(sample_type, {
+    "TEMPERATURE": Int32sl,
+    "ACCELERATION": Struct(
+        "x" / Int32sl,
+        "y" / Int32sl,
+        "z" / Int32sl,
+    ),
+    "ANGULAR_RATE": Struct(
+        "x" / Int32sl,
+        "y" / Int32sl,
+        "z" / Int32sl,
+    ),
+    "VOLTAGE": Int32ul,
+    "ECG": Int32sl,
+    "HUMIDITY": Int32ul,
+    "PRESSURE": Int64ul,
+    "MAGNETIC_FLUX_DENSITY": Struct(
+        "x" / Int32sl,
+        "y" / Int32sl,
+        "z" / Int32sl,
+    ),
+    "BINARY_DATA": blob,
+})
+
+data_block_sample_arbitrary = lambda sample_type: Struct(
+    "timestamp" / Int64ul,
+    "sample" / data_block_sample(sample_type)
+)
+
+data_block_sample_arbitrary_subheader = Struct(
+    "start_time" / Int64ul,
+)
+
+data_block_sample_frequency = lambda sample_type: Struct(
+    "sample" / data_block_sample(sample_type)
+)
+
+data_block_sample_frequency_subheader = Struct(
+    "start_time" / Int64ul,
+    "period" / Int64ul,
+)
+
+data_block_sample_single = lambda type_, sample_type: Switch(type_, {
+    "ARBITRARY_TIMESTAMP": data_block_sample_arbitrary(sample_type),
+    "CONSTANT_FREQUENCY": data_block_sample_frequency(sample_type),
+})
+
+data_block_subheader = Switch(this.header.block_type, {
+    "ARBITRARY_TIMESTAMP": data_block_sample_arbitrary_subheader,
+    "CONSTANT_FREQUENCY": data_block_sample_frequency_subheader
+})
+
+data_block_header = Struct(
+    "block_type" / BLOCK_TYPE,
+    "sample_type" / SAMPLE_TYPE,
+    "channel_id" / Int16ul,
+    "data_size" / Int64ul,
+)
+
+data_block = Struct(
+    "header" / data_block_header,
+    "subheader" / data_block_subheader,
+    "metadata" / data_block_metadata,
+    "samples" / GreedyRange(data_block_sample_single(this.header.block_type, this._.header.sample_type))
+)
+
+resd = Struct(
+    "header" / resd_header,
+    "blocks" / GreedyRange(data_block)
+)
--- a/tools/csv2resd/requirements.txt
+++ b/tools/csv2resd/requirements.txt
@@ -0,0 +1 @@
+construct==2.10.68
--- a/tools/csv2resd/resd.py
+++ b/tools/csv2resd/resd.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+
+from grammar import resd_header, data_block, data_block_sample_frequency, data_block_sample_arbitrary, data_block_header, data_block_subheader, data_block_metadata_item, BLOCK_TYPE, SAMPLE_TYPE
+
+__VERSION__ = 1
+
+
+class RESD:
+    def __init__(self, file_path):
+        self.file_handle = open(file_path, 'wb')
+        self.blocks = {}
+        self._write_header()
+
+    def __del__(self):
+        self.flush()
+        self.file_handle.close()
+
+    def new_block(self, sample_type, block_type, channel_id=0):
+        previous_block = self.get_block(sample_type, channel_id)
+        if previous_block is not None:
+            self.flush(sample_type, channel_id)
+
+        block = ({
+            BLOCK_TYPE.CONSTANT_FREQUENCY: RESDBlockConstantFrequency,
+            BLOCK_TYPE.ARBITRARY_TIMESTAMP: RESDBlockArbitraryTimestamp
+        })[block_type](sample_type, block_type, channel_id)
+
+        self.blocks[(sample_type, channel_id)] = block
+        return block
+
+    def get_block(self, sample_type, channel_id=0):
+        return self.blocks.get((sample_type, channel_id), None)
+
+    def get_block_or_create(self, sample_type, block_type, channel_id=0):
+        block = self.get_block(sample_type, channel_id)
+        return block if block else self.new_block(sample_type, block_type, channel_id)
+
+    def flush(self, sample_type=None, channel_id=None):
+        for key in list(self.blocks.keys()):
+            block_sample_type, block_channel_id = key
+
+            if sample_type and block_sample_type != sample_type:
+                continue
+            if channel_id and block_channel_id != channel_id:
+                continue
+
+            self.blocks[key].flush(self.file_handle)
+            del self.blocks[key]
+
+    def _write_header(self):
+        resd_header.build_stream({
+            'version': __VERSION__,
+        }, self.file_handle)
+
+
+class RESDBlock:
+    def __init__(self, sample_type, block_type, channel_id):
+        self.sample_type = sample_type
+        self.block_type = block_type
+        self.channel_id = channel_id
+        self.block_metadata = RESDBlockMetadata()
+        self.samples = []
+
+    @property
+    def metadata(self):
+        return self.block_metadata
+
+    def flush(self, file):
+        metadata = self.metadata.build()
+        data_size = (
+            data_block_subheader.sizeof(header={'block_type': self.block_type}) +
+            metadata['size'] + 8 +
+            self._samples_sizeof()
+        )
+
+        header = self._header(data_size)
+        subheader = self._subheader()
+        data_block.build_stream({
+            'header': header,
+            'subheader': subheader,
+            'metadata': metadata,
+            'samples': self.samples,
+        }, file)
+
+    def _header(self, data_size):
+        return {
+            'block_type': self.block_type,
+            'sample_type': self.sample_type,
+            'channel_id': self.channel_id,
+            'data_size': data_size,
+        }
+
+    def _subheader(self):
+        return None
+
+    def _samples_sizeof(self):
+        pass
+
+    @classmethod
+    def _wrap_sample(cls, sample):
+        if isinstance(sample, bytes):
+            sample = {
+                'size': len(sample),
+                'data': sample,
+            }
+        return sample
+
+
+class RESDBlockConstantFrequency(RESDBlock):
+    __period = int(1e9)
+    __start_time = 0
+
+    @property
+    def period(self):
+        return self.__period
+
+    @period.setter
+    def period(self, value):
+        self.__period = value
+
+    @property
+    def frequency(self):
+        return 1e9 / self.__period
+
+    @frequency.setter
+    def frequency(self, value):
+        self.__period = int(1e9 / value)
+
+    @property
+    def start_time(self):
+        return self.__start_time
+
+    @start_time.setter
+    def start_time(self, value):
+        self.__start_time = value
+
+    def add_sample(self, sample):
+        self.samples.append({'sample': RESDBlock._wrap_sample(sample)})
+
+    def _subheader(self):
+        return {
+            'start_time': self.__start_time,
+            'period': self.__period
+        }
+
+    def _samples_sizeof(self):
+        return sum(len(data_block_sample_frequency(self.sample_type).build(sample)) for sample in self.samples)
+
+
+class RESDBlockArbitraryTimestamp(RESDBlock):
+    __start_time = 0
+
+    @property
+    def start_time(self):
+        return self.__start_time
+
+    @start_time.setter
+    def start_time(self, value):
+        self.__start_time = value
+
+    def add_sample(self, sample, timestamp):
+        self.samples.append({'sample': RESDBlock._wrap_sample(sample), 'timestamp': timestamp})
+
+    def _subheader(self):
+        return {
+            'start_time': self.__start_time,
+        }
+
+    def _samples_sizeof(self):
+        return sum(len(data_block_sample_arbitrary(self.sample_type).build(sample)) for sample in self.samples)
+
+
+class RESDBlockMetadata:
+    def __init__(self):
+        self.metadata = []
+        self.keys = set()
+
+    def __getattr__(self, name):
+        prefix = 'insert_'
+        if name[:len(prefix)] != prefix:
+            return None
+
+        method = name[len(prefix):]
+        type_idx = ({
+            'int8':   0x01,
+            'uint8':  0x02,
+            'int16':  0x03,
+            'uint16': 0x04,
+            'int32':  0x05,
+            'uint32': 0x06,
+            'int64':  0x07,
+            'uint64': 0x08,
+            'float':  0x09,
+            'double': 0x0A,
+            'text':   0x0B,
+            'blob':   0x0C,
+        }).get(method, None)
+
+        if method is None:
+            return None
+
+        return lambda key, value: self._insert(type_idx, key, value)
+
+    def build(self):
+        return {'items': self.metadata, 'size': self._sizeof()}
+
+    def remove(self, key):
+        if key not in self.keys:
+            return
+        self.keys.remove(key)
+        index = next(i for i, value in enumerate(self.metadata) if value['key'] == key)
+        self.metadata.pop(index)
+
+    def _sizeof(self):
+        return sum(len(data_block_metadata_item.build(item)) for item in self.metadata)
+
+    def _insert(self, type_idx, key, value):
+        self.remove(key)
+        self.keys.add(key)
+        self.metadata.append({
+            'type': type_idx,
+            'key': key,
+            'value': value
+        })