Following are the configuration files for an Eyeriss-like design...
architecture:
# ============================================================
# Architecture Description
# ============================================================
version: 0.3
subtree:
- name: system
local:
- name: DRAM
class: DRAM
attributes:
type: LPDDR4
width: 64
block-size: 4
word-bits: 16
subtree:
- name: eyeriss
attributes:
technology: 45nm
local:
- name: shared_glb
class: smartbuffer_SRAM
attributes:
memory_depth: 16384
memory_width: 64
n_banks: 32
block-size: 4
word-bits: 16
read_bandwidth: 16
write_bandwidth: 16
- name: DummyBuffer[0..13] # for better mapping
class: regfile
attributes:
depth: 16
width: 16
word-bits: 16
block-size: 1
meshX: 14
subtree:
- name: PE[0..167]
local:
- name: ifmap_spad
class: smartbuffer_RF
attributes:
memory_depth: 12
memory_width: 16
block-size: 1
word-bits: 16
meshX: 14
read_bandwidth: 2
write_bandwidth: 2
- name: weights_spad
class: smartbuffer_RF
attributes:
memory_depth: 192
memory_width: 16
block-size: 1
word-bits: 16
meshX: 14
read_bandwidth: 2
write_bandwidth: 2
- name: psum_spad
class: smartbuffer_RF
attributes:
memory_depth: 16
memory_width: 16
update_fifo_depth: 2
block-size: 1
word-bits: 16
meshX: 14
read_bandwidth: 2
write_bandwidth: 2
- name: mac
class: intmac
attributes:
datawidth: 16
meshX : 14
#
# The following constraints are limitations of the hardware architecture and dataflow
#
architecture_constraints:
targets:
# certain buffer only stores certain datatypes
- target: psum_spad
type: bypass
bypass: [Inputs, Weights]
keep: [Outputs]
- target: weights_spad
type: bypass
bypass: [Inputs, Outputs]
keep: [Weights]
- target: ifmap_spad
type: bypass
bypass: [Weights, Outputs]
keep: [Inputs]
- target: DummyBuffer
type: bypass
bypass: [Inputs, Outputs, Weights]
- target: shared_glb
type: bypass
bypass: [Weights]
keep: [Inputs, Outputs]
- target: DummyBuffer
type: spatial
split: 4
permutation: NPQR SCM
factors: N=1 P=1 Q=1 R=1 S=0
# only allow fanout of M, Q out from glb
- target: shared_glb
type: spatial
split: 7
permutation: NCPRSQM
factors: N=1 C=1 P=1 R=1 S=1
# one ofmap position but of different output channels
- target: psum_spad
type: temporal
permutation: NCPQRS M
factors: N=1 C=1 R=1 S=1 P=1 Q=1
# row stationary -> 1 row at a time
- target: weights_spad
type: temporal
permutation: NMPQS CR
factors: N=1 M=1 P=1 Q=1 S=1 R=0
- target: ifmap_spad
type: temporal
permutation: NMCPQRS
factors: N=1 M=1 C=1 P=1 Q=1 R=1 S=1
# enforce the hardware limit of the bypassing everything
- target: DummyBuffer
type: temporal
factors: N=1 M=1 C=1 P=1 Q=1 R=1 S=1
#
# The following constraints are not limitations of the hardware architecture and dataflow,
# but help limit the search space to speed up search
#
mapspace_constraints:
targets:
# intuitive optimization to reduce map space size
# the factors of these are 1 anyways, so the order does not really matter
- target: DummyBuffer
type: temporal
permutation: NMCPQRS
# intuitive optimization for row stationary
# -> process a row/col of the output before going to the next one
- target: shared_glb
type: temporal
permutation: QRSC PNM
factors: Q=1 R=1 S=1 P=0
# intuitive optimization to reduce map space size
- target: DRAM
type: temporal
permutation: RSP CMNQ
factors: R=1 S=1 P=1