Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
46e46a7
Add the DmaEngine implementation and the test.
BenkangPeng Jun 1, 2026
d64eded
[Test] Update the test of DmaEngine.
BenkangPeng Jun 1, 2026
f968239
Add DMA support to DataMemControllerRTL and implement corresponding t…
BenkangPeng Jun 2, 2026
7bd704e
Add the dma ports into CgraTemplateRTL
BenkangPeng Jun 2, 2026
72d9531
Wrap the Cgra and Dma into one single module.
BenkangPeng Jun 2, 2026
46cfb8e
[Script] Add the local_CI script file
BenkangPeng Jun 2, 2026
3d4ec4e
Update .gitignore to ignore the log file
BenkangPeng Jun 2, 2026
e90d45b
[Test] Add the test for CgraDmaRTL
BenkangPeng Jun 2, 2026
8fe1e76
[Fix] Fix the bit mismatch error between dma_idx and num_xbar_in_ports.
BenkangPeng Jun 2, 2026
bf28acc
[Doc] Add some comments
BenkangPeng Jun 2, 2026
8480563
[Fix] Fix the bit mismatch by type convertion
BenkangPeng Jun 3, 2026
25c17cb
Move some constant into common header file
BenkangPeng Jun 3, 2026
e59d782
[Refactor] Wrap the signals between dma and dram with SendIfcRTL and …
BenkangPeng Jun 3, 2026
4b994de
[Refactor] Update DMA command handling in CgraDmaRTL and CgraTemplate…
BenkangPeng Jun 13, 2026
241bee7
[Refactor] Simplify DMA interface connections in CgraDmaRTL, CgraTemp…
BenkangPeng Jun 13, 2026
a125202
[Fix] Use Outport instead of Wire in DmaWireIfcRTL to avoid the RTLIR…
BenkangPeng Jun 14, 2026
4112ec3
[CleanUp] Remove the unnecessary ports.
BenkangPeng Jun 14, 2026
43da86d
[Feature] Introduce DMA data structure and DMA-to-DRAM write request …
BenkangPeng Jun 14, 2026
6e647dd
[Refactor] Pass DmaCmdType and DmaDataType into DataMemController and…
BenkangPeng Jun 14, 2026
78a1587
[Refactor] Update DmaEngineRTL to use DmaDramWrReqIfcRTL for DRAM wri…
BenkangPeng Jun 14, 2026
6fb7e50
[Refactor] Enhance DMA integration in CgraTemplateRTL and ControllerR…
BenkangPeng Jun 14, 2026
a7618d8
[Refactor] Update CgraDmaRTL to utilize DmaDramWrReqIfcRTL for DRAM w…
BenkangPeng Jun 14, 2026
1bf3b79
[Fix] Fix the bitwidth mismatch error between DataType and DmaSpmData…
BenkangPeng Jun 15, 2026
d4ce981
[CleanUp] Update DMA attribute references to use new constants for im…
BenkangPeng Jun 15, 2026
bca3100
[Rename][NFC] Rename some variables for clarity
BenkangPeng Jun 17, 2026
075f63f
Add the assertion to ensure the number of tranfer data is the multipl…
BenkangPeng Jun 22, 2026
628e2d3
Add assertions to ensure that the number of bytes transferred by DMA …
BenkangPeng Jun 22, 2026
90023f2
[Refactor] Remove DmaWireIfcRTL and DmaSpmWireIfcRTL. Use ValRdyRecv/…
BenkangPeng Jun 23, 2026
37a363e
Split the dma_spm_to_dram into 3 signals.
BenkangPeng Jun 23, 2026
af3c0a6
Deprecate the DmaSpmMasterRTL in DMA module
BenkangPeng Jun 23, 2026
0fb1b5a
Refactor DataMemControllerRTL to replace DmaSpmMinionIfcRTL with ValR…
BenkangPeng Jun 23, 2026
0bb2d9c
Refactor CgraDmaRTL and CgraTemplateRTL to replace DmaSpmMinionIfcRTL…
BenkangPeng Jun 23, 2026
06eeec4
Add CgraDmaRTL wrapper integrating CGRA with DMA engine and correspon…
BenkangPeng Jun 23, 2026
33622ea
Refactor CgraDmaRTL to replace DmaDramWrReqIfcRTL with new DMA DRAM w…
BenkangPeng Jun 23, 2026
832f701
Refactor DMA signal handling across multiple components to improve cl…
BenkangPeng Jun 23, 2026
94ca68a
[Fix] Precompute commonly used values in DmaEngineRTL to avoid PyMTL3…
BenkangPeng Jun 23, 2026
282159d
Add Verilog generation functionality for the new wrapper.
BenkangPeng Jun 23, 2026
b772b7b
Enhance DMA documentation in messages.py and DmaEngineRTL.py by addin…
BenkangPeng Jun 24, 2026
089e4ba
[Rename] Rename tag to dma_tag
BenkangPeng Jun 25, 2026
0ed4b3c
[Rename] Update references from 'ctrl' to 'controller'. Enhance docum…
BenkangPeng Jun 26, 2026
618d6e1
[Fix] Update dma_cmd string representation to use 'dma_tag' instead o…
BenkangPeng Jun 26, 2026
04a2a4f
Add warning comment in ControllerRTL.py regarding potential conflict …
BenkangPeng Jun 26, 2026
304ae24
[Fix] Update DmaEngineRTL to use dma_tag
BenkangPeng Jun 27, 2026
85fcd19
[Fix] Update ControllerRTL and DmaEngineRTL to consistently use 'dma_…
BenkangPeng Jun 27, 2026
559c419
Refactor DmaEngineRTL to simplify word calculation logic
BenkangPeng Jun 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ build
__pycache__
.hypothesis
.vscode
*.log
64 changes: 61 additions & 3 deletions cgra/CgraTemplateRTL.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ def construct(s, CgraPayloadType,
provided_max_per_cgra_rows = None,
provided_max_per_cgra_cols = None,
provided_max_num_rd_tiles = None,
provided_max_num_wr_tiles = None):
provided_max_num_wr_tiles = None,
has_dma_ports = False,
DmaDataType = mk_dma_data(),
DmaCmdType = mk_dma_cmd()):
"""
provided_max_per_cgra_rows: the row number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch.
provided_max_per_cgra_cols: the column number of the largest cgra in the multi heterogeneous cgra architecture. None for single cgra arch or Homogeneous multi-cgra arch.
Expand Down Expand Up @@ -126,6 +129,14 @@ def construct(s, CgraPayloadType,
CtrlRingPos = mk_ring_pos(max_num_tiles + 1)
CtrlAddrType = mk_bits(clog2(ctrl_mem_size))
DataAddrType = mk_bits(clog2(data_mem_size_global))
DmaTagType = DmaCmdType.get_field_type(kAttrDmaTag)
DmaSpmDataType = DmaDataType.get_field_type(kAttrSpmData)
DmaSpmAddrType = DmaCmdType.get_field_type(kAttrSpmAddr)
DmaDoneType = mk_dma_done(DmaTagType.nbits)
DmaSpmWriteReqType = mk_dma_spm_write_req(DmaSpmAddrType.nbits,
DmaSpmDataType.nbits)
DmaSpmReadReqType = mk_dma_spm_read_req(DmaSpmAddrType.nbits)
DmaSpmReadRespType = mk_dma_spm_read_resp(DmaSpmDataType.nbits)
assert(data_mem_size_per_bank * num_banks_per_cgra <= \
data_mem_size_global)

Expand All @@ -135,6 +146,21 @@ def construct(s, CgraPayloadType,
s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType)
s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType)

# Optional DMA engine-facing ports. The controller owns command decode and
# forwards DMA SPM access to the data memory.
if has_dma_ports:
s.dma_cmd = SendIfcRTL(DmaCmdType)

s.dma_done = RecvIfcRTL(DmaDoneType)

# Receive the request of writing into SPM from the DMA.
s.recv_from_dma_spm_wr_req = RecvIfcRTL(DmaSpmWriteReqType)
# Receive the request of reading from SPM from the DMA.
s.recv_from_dma_spm_rd_req = RecvIfcRTL(DmaSpmReadReqType)
# Send the response of reading from SPM to the DMA.
s.send_to_dma_spm_rd_resp = SendIfcRTL(DmaSpmReadRespType)


if is_multi_cgra:
# Use the largest CGRA shape to set the boundary ports for compatibility in the case of heterogeneous multi-cgra.
# Remember to ground the remaining boundary ports of the current CGRA when the current CGRA has fewer rows or columns than the largest CGRA.
Expand Down Expand Up @@ -168,11 +194,17 @@ def construct(s, CgraPayloadType,
multi_cgra_columns,
max_num_tiles,
mem_access_is_combinational,
idTo2d_map)
idTo2d_map,
has_dma_ports,
DmaCmdType,
DmaDataType)
s.cgra_id = InPort(CgraIdType)
s.controller = ControllerRTL(NocPktType,
multi_cgra_rows, multi_cgra_columns,
max_num_tiles, controller2addr_map, idTo2d_map)
max_num_tiles, controller2addr_map, idTo2d_map,
has_dma_ports,
DmaDataType,
DmaCmdType)
# Connects controller id.
s.controller.cgra_id //= s.cgra_id
# An additional router for controller to receive CMD_COMPLETE signal from Ring to CPU.
Expand All @@ -190,6 +222,32 @@ def construct(s, CgraPayloadType,
s.data_mem.address_lower //= s.address_lower
s.data_mem.address_upper //= s.address_upper

if has_dma_ports:
# CPU packets are decoded by the controller before becoming DMA commands.
s.dma_cmd //= s.controller.dma_cmd
s.dma_done //= s.controller.dma_done

s.recv_from_dma_spm_wr_req //= s.controller.recv_from_dma_spm_wr_req
s.recv_from_dma_spm_rd_req //= s.controller.recv_from_dma_spm_rd_req
s.send_to_dma_spm_rd_resp //= s.controller.send_to_dma_spm_rd_resp

else:
# Grounds the DMA ports when no DMA engine is attached.
s.controller.dma_cmd.rdy //= 0
s.controller.dma_done.val //= 0
s.controller.dma_done.msg //= DmaDoneType()

s.controller.recv_from_dma_spm_wr_req.val //= 0
s.controller.recv_from_dma_spm_wr_req.msg //= DmaSpmWriteReqType()
s.controller.recv_from_dma_spm_rd_req.val //= 0
s.controller.recv_from_dma_spm_rd_req.msg //= DmaSpmReadReqType()
s.controller.send_to_dma_spm_rd_resp.rdy //= 0

# Controller <-> SPM/data_mem
s.controller.send_to_mem_spm_wr_req //= s.data_mem.recv_from_controller_spm_wr_req
s.controller.send_to_mem_spm_rd_req //= s.data_mem.recv_from_controller_spm_rd_req
s.controller.recv_from_mem_spm_rd_resp //= s.data_mem.send_to_controller_spm_rd_resp

# Connects data memory with controller.
s.data_mem.recv_from_noc_load_request //= s.controller.send_to_mem_load_request
s.data_mem.recv_from_noc_store_request //= s.controller.send_to_mem_store_request
Expand Down
194 changes: 194 additions & 0 deletions cgra/IntegratedCgraWithDmaRTL.py
Comment thread
tancheng marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""
=========================================================================
IntegratedCgraWithDmaRTL.py
=========================================================================

Wrapper that composes a CGRA template with a DMA engine attached to the
CGRA data SPM.
"""

from pymtl3 import *

from .CgraTemplateRTL import CgraTemplateRTL
from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL
from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL
from ..lib.messages import *
from ..lib.util.data_struct_attr import *
from ..mem.dma.DmaEngineRTL import DmaEngineRTL


class IntegratedCgraWithDmaRTL( Component ):
"""
IntegratedCgraWithDmaRTL is a top-level wrapper that integrates a CGRA instance with a
DMA engine.

Architectural Design:
- It instantiates a standard CGRA template (`CgraTemplateRTL`) and a
DMA engine (`DmaEngineRTL`).
- CPU control packets are passed through to the CGRA's controller.
DMA commands are decoded there.
- The DMA engine accesses the CGRA's internal data SPM through controller-
forwarded ports; it is not connected directly to `DataMemControllerRTL`.
- External memory requests from the DMA engine are exposed at the top level
to be connected to a DRAM model or an AXI adapter.
- Boundary data ports for multi-CGRA configurations are also passed through
if enabled.
"""

def construct(s, CgraPayloadType,
multi_cgra_rows,
multi_cgra_columns,
per_cgra_rows, per_cgra_columns,
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
num_registers_per_reg_bank, num_ctrl,
total_steps, mem_access_is_combinational,
FunctionUnit, FuList, TileList, LinkList,
dataSPM, controller2addr_map, idTo2d_map,
is_multi_cgra = True, cgra_id = 0,
# For heterogeneous multi-cgra support.(maybe remove it in IntegratedCgraWithDmaRTL for simplicity?)
provided_max_per_cgra_rows = None,
provided_max_per_cgra_cols = None,
provided_max_num_rd_tiles = None,
provided_max_num_wr_tiles = None):

DataType = CgraPayloadType.get_field_type(kAttrData)
data_bitwidth = DataType.get_field_type(kAttrPayload).nbits
assert data_bitwidth == 32

max_per_cgra_rows = provided_max_per_cgra_rows if provided_max_per_cgra_rows is not None else per_cgra_rows
max_per_cgra_cols = provided_max_per_cgra_cols if provided_max_per_cgra_cols is not None else per_cgra_columns
max_num_tiles = max_per_cgra_rows * max_per_cgra_cols
max_num_rd_tiles = provided_max_num_rd_tiles if provided_max_num_rd_tiles is not None else dataSPM.getNumOfValidReadPorts()

CtrlPktType = mk_intra_cgra_pkt(multi_cgra_columns, multi_cgra_rows,
max_num_tiles, CgraPayloadType)
NocPktType = mk_inter_cgra_pkt(multi_cgra_columns, multi_cgra_rows,
max_num_tiles, max_num_rd_tiles,
CgraPayloadType)

CgraIdType = mk_cgra_id_type(multi_cgra_columns, multi_cgra_rows)
DataAddrType = mk_bits(clog2(data_mem_size_global))
DmaCmdType = mk_dma_cmd(dram_addr_nbits = 64,
spm_addr_nbits = 32,
bytes_nbits = 32,
tag_nbits = 8)

DmaDataType = mk_dma_data(dram_data_nbits = 128,
dram_mask_nbits = 16,
spm_data_nbits = 32)

DmaDramAddrType = DmaCmdType.get_field_type(kAttrDramAddr)
DmaMemDataType = DmaDataType.get_field_type(kAttrDramData)
DmaMemMaskType = DmaDataType.get_field_type(kAttrDramMask)
DmaDramWrReqType = mk_dma_dram_wr_req(DmaDramAddrType.nbits, DmaMemDataType.nbits, DmaMemMaskType.nbits)

# Existing CGRA-facing interfaces.
# CGRA <-> CPU
s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType)
s.send_to_cpu_pkt = SendIfcRTL(CtrlPktType)

if is_multi_cgra:
s.recv_from_inter_cgra_noc = RecvIfcRTL(NocPktType)
s.send_to_inter_cgra_noc = SendIfcRTL(NocPktType)

s.recv_data_on_boundary_north = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)]
s.send_data_on_boundary_north = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)]
s.recv_data_on_boundary_south = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_cols)]
s.send_data_on_boundary_south = [SendIfcRTL(DataType) for _ in range(max_per_cgra_cols)]
s.recv_data_on_boundary_west = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)]
s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)]
s.recv_data_on_boundary_east = [RecvIfcRTL(DataType) for _ in range(max_per_cgra_rows)]
s.send_data_on_boundary_east = [SendIfcRTL(DataType) for _ in range(max_per_cgra_rows)]

s.cgra_id = InPort(CgraIdType)
# The local address range of current CGRA.
# Any address out of this range will be assumed as remote address.
s.address_lower = InPort(DataAddrType)
s.address_upper = InPort(DataAddrType)

# Abstract external dram memory interfaces for the internal DMA engine.

s.send_to_dram_rd_req = SendIfcRTL(DmaDramAddrType)
s.recv_from_dram_rd_resp = RecvIfcRTL(DmaMemDataType)

s.send_to_dram_wr_req = SendIfcRTL(DmaDramWrReqType)
s.recv_from_dram_wr_resp = RecvIfcRTL(mk_bits(1))

# Components.

s.cgra = CgraTemplateRTL(CgraPayloadType,
Comment thread
tancheng marked this conversation as resolved.
multi_cgra_rows,
multi_cgra_columns,
per_cgra_rows, per_cgra_columns,
ctrl_mem_size, data_mem_size_global,
data_mem_size_per_bank, num_banks_per_cgra,
num_registers_per_reg_bank, num_ctrl,
total_steps, mem_access_is_combinational,
FunctionUnit, FuList, TileList, LinkList,
dataSPM, controller2addr_map, idTo2d_map,
is_multi_cgra, cgra_id,
provided_max_per_cgra_rows,
provided_max_per_cgra_cols,
provided_max_num_rd_tiles,
provided_max_num_wr_tiles,
has_dma_ports = True,
DmaDataType = DmaDataType,
DmaCmdType = DmaCmdType)

DmaSpmDataType = DmaDataType.get_field_type(kAttrSpmData)
DmaSpmAddrType = DmaCmdType.get_field_type(kAttrSpmAddr)
DmaBytesType = DmaCmdType.get_field_type(kAttrNBytes)
DmaTagType = DmaCmdType.get_field_type(kAttrDmaTag)
s.dma = DmaEngineRTL(spm_data_nbits = DmaSpmDataType.nbits,
dram_data_nbits = DmaMemDataType.nbits,
dram_addr_nbits = DmaDramAddrType.nbits,
spm_addr_nbits = DmaSpmAddrType.nbits,
bytes_nbits = DmaBytesType.nbits,
tag_nbits = DmaTagType.nbits)

# CGRA passthrough connections.

s.recv_from_cpu_pkt //= s.cgra.recv_from_cpu_pkt
s.send_to_cpu_pkt //= s.cgra.send_to_cpu_pkt

if is_multi_cgra:
s.recv_from_inter_cgra_noc //= s.cgra.recv_from_inter_cgra_noc
s.send_to_inter_cgra_noc //= s.cgra.send_to_inter_cgra_noc

for i in range(max_per_cgra_cols):
s.recv_data_on_boundary_north[i] //= s.cgra.recv_data_on_boundary_north[i]
s.send_data_on_boundary_north[i] //= s.cgra.send_data_on_boundary_north[i]
s.recv_data_on_boundary_south[i] //= s.cgra.recv_data_on_boundary_south[i]
s.send_data_on_boundary_south[i] //= s.cgra.send_data_on_boundary_south[i]

for i in range(max_per_cgra_rows):
s.recv_data_on_boundary_west[i] //= s.cgra.recv_data_on_boundary_west[i]
s.send_data_on_boundary_west[i] //= s.cgra.send_data_on_boundary_west[i]
s.recv_data_on_boundary_east[i] //= s.cgra.recv_data_on_boundary_east[i]
s.send_data_on_boundary_east[i] //= s.cgra.send_data_on_boundary_east[i]

s.cgra_id //= s.cgra.cgra_id
s.address_lower //= s.cgra.address_lower
s.address_upper //= s.cgra.address_upper


# Connections between CGRA and DMA engine.
# CGRA communicates with DMA engine through the controller.
s.cgra.dma_cmd //= s.dma.dma_cmd
s.dma.dma_done //= s.cgra.dma_done

s.send_to_dram_rd_req //= s.dma.send_to_dram_rd_req
s.recv_from_dram_rd_resp //= s.dma.recv_from_dram_rd_resp

s.send_to_dram_wr_req //= s.dma.send_to_dram_wr_req
s.recv_from_dram_wr_resp //= s.dma.recv_from_dram_wr_resp

# DMA to controller-forwarded SPM connections.

s.dma.send_to_spm_wr_req //= s.cgra.recv_from_dma_spm_wr_req
s.dma.send_to_spm_rd_req //= s.cgra.recv_from_dma_spm_rd_req
s.dma.recv_from_spm_rd_resp //= s.cgra.send_to_dma_spm_rd_resp

def line_trace(s):
return f"{s.dma.line_trace()} || {s.cgra.line_trace()}"
Loading
Loading