# Double buffer read memory implementation
# TODO: Verification Pending
import math
import numpy as np
from tqdm import tqdm
from scalesim.memory.read_port import read_port
[docs]
class read_buffer:
"""
Class which runs the memory simulation of double buffered ifmap/filter SRAM.
The double buffering helps to hide the DRAM latency when the SRAM is servicing requests from the systolic array \
using one of the buffers while the other buffer prefetches from the DRAM.
"""
def __init__(self):
"""
The constructor method for the class
"""
# Buffer properties: User specified
self.total_size_bytes = 128
self.word_size = 1 # Bytes
self.active_buf_frac = 0.9
self.hit_latency = 1 # Cycles after which a request is served if already in the buffer
# Buffer properties: Calculated
self.total_size_elems = math.floor(self.total_size_bytes / self.word_size)
self.active_buf_size = int(math.ceil(self.total_size_elems * 0.9))
self.prefetch_buf_size = self.total_size_elems - self.active_buf_size
# Backing interface properties
self.backing_buffer = read_port()
self.req_gen_bandwidth = 100 # words per cycle
# Status of the buffer
self.hashed_buffer = dict()
self.num_lines = 0
self.num_active_buf_lines = 1
self.num_prefetch_buf_lines = 1
self.active_buffer_set_limits = []
self.prefetch_buffer_set_limits = []
# Variables to enable prefetching
self.fetch_matrix = np.ones((1, 1))
self.last_prefect_cycle = -1
self.next_line_prefetch_idx = 0
self.next_col_prefetch_idx = 0
# Access counts
self.num_access = 0
# Trace matrix
self.trace_matrix = np.ones((1, 1))
# Flags
self.active_buf_full_flag = False
self.hashed_buffer_valid = False
self.trace_valid = False
#
[docs]
def set_params(self, backing_buf_obj,
total_size_bytes=1, word_size=1, active_buf_frac=0.9,
hit_latency=1, backing_buf_bw=1
):
"""
Method to set the ifmap/filter double buffered memory simulation parameters for housekeeping.
:param backing_buf_obj: Backing buffer object, by default is read_port
:param total_size_bytes: Read buffer (SRAM) total size in bytes
:param word_size: The word size of individual elements
:param active_buf_frac: The active fraction of the double duffered ifmap/filter memory (serving the systolic array memory requests)
:param hit_latency: Hit latency of the double duffered ifmap/filter memory
:param backing_buf_bw: Bandwidth of the backing buffer for ifmap SRAM. The default backing buffer is a dummy one (read port).
:return: None
"""
self.total_size_bytes = total_size_bytes
self.word_size = word_size
assert 0.5 <= active_buf_frac < 1, "Valid active buf frac [0.5,1)"
self.active_buf_frac = round(active_buf_frac, 2)
self.hit_latency = hit_latency
self.backing_buffer = backing_buf_obj
self.req_gen_bandwidth = backing_buf_bw
# Calculate these based on the values provided
self.total_size_elems = math.floor(self.total_size_bytes / self.word_size)
self.active_buf_size = int(math.ceil(self.total_size_elems * self.active_buf_frac))
self.prefetch_buf_size = self.total_size_elems - self.active_buf_size
#
[docs]
def reset(self): # TODO: check if all resets are working propoerly
"""
Method to reset the read buffer parameters.
:return: None
"""
# Buffer properties: User specified
self.total_size_bytes = 128
self.word_size = 1 # Bytes
self.active_buf_frac = 0.9
self.hit_latency = 1 # Cycles after which a request is served if already in the buffer
# Buffer properties: Calculated
self.total_size_elems = math.floor(self.total_size_bytes / self.word_size)
self.active_buf_size = int(math.ceil(self.total_size_elems * 0.9))
self.prefetch_buf_size = self.total_size_elems - self.active_buf_size
# Backing interface properties
self.backing_buffer = read_port()
self.req_gen_bandwidth = 100 # words per cycle
# Status of the buffer
self.hashed_buffer = dict()
self.active_buffer_set_limits = []
self.prefetch_buffer_set_limits = []
# Variables to enable prefetching
self.fetch_matrix = np.ones((1, 1))
self.last_prefect_cycle = -1
self.next_line_prefetch_idx = 0
self.next_col_prefetch_idx = 0
# Access counts
self.num_access = 0
# Trace matrix
self.trace_matrix = np.ones((1, 1))
# Flags
self.active_buf_full_flag = False
self.hashed_buffer_valid = False
self.trace_valid = False
#
[docs]
def set_fetch_matrix(self, fetch_matrix_np):
"""
Method to set the fetch matrix responsible for prefetching from the DRAM
:return: None
"""
# The operand matrix determines what to pre-fetch into both active and prefetch buffers
# In 'user' mode, this will be set in the set_params
num_elems = fetch_matrix_np.shape[0] * fetch_matrix_np.shape[1]
num_lines = int(math.ceil(num_elems / self.req_gen_bandwidth))
self.fetch_matrix = np.ones((num_lines, self.req_gen_bandwidth)) * -1
# Put stuff into the fetch matrix
# This is done to ensure that there is no shape mismatch
# Not sure if this is the optimal way to do it or not
for i in range(num_elems):
src_row = math.floor(i / fetch_matrix_np.shape[1])
src_col = math.floor(i % fetch_matrix_np.shape[1])
dest_row = math.floor(i / self.req_gen_bandwidth)
dest_col = math.floor(i % self.req_gen_bandwidth)
self.fetch_matrix[dest_row][dest_col] = fetch_matrix_np[src_row][src_col]
# Once the fetch matrices are set, populate the data structure for fast lookups and servicing
self.prepare_hashed_buffer()
#
[docs]
def prepare_hashed_buffer(self):
"""
Method to convert the fetch matrix into a hashed buffer for fast lookups.
:return: None
"""
elems_per_set = math.ceil(self.total_size_elems / 100)
prefetch_rows = self.fetch_matrix.shape[0]
prefetch_cols = self.fetch_matrix.shape[1]
line_id = 0
elem_ctr = 0
current_line = set()
for r in range(prefetch_rows):
for c in range(prefetch_cols):
elem = self.fetch_matrix[r][c]
if not elem == -1:
current_line.add(elem)
elem_ctr += 1
if not elem_ctr < elems_per_set: # ie > or =
self.hashed_buffer[line_id] = current_line
line_id += 1
elem_ctr = 0
current_line = set() # new set
self.hashed_buffer[line_id] = current_line
max_num_active_buf_lines = int(math.ceil(self.active_buf_size / elems_per_set))
max_num_prefetch_buf_lines = int(math.ceil(self.prefetch_buf_size / elems_per_set))
num_lines = line_id + 1
if num_lines > max_num_active_buf_lines:
self.num_active_buf_lines = max_num_active_buf_lines
else:
self.num_active_buf_lines = num_lines
remaining_lines = num_lines - self.num_active_buf_lines
if remaining_lines > max_num_prefetch_buf_lines:
self.num_prefetch_buf_lines = max_num_prefetch_buf_lines
else:
self.num_prefetch_buf_lines = remaining_lines
self.num_lines = num_lines
self.hashed_buffer_valid = True
#
[docs]
def active_buffer_hit(self, addr):
"""
Method to check if the address is hit or miss in the active read buffer.
:param addr: Address of the incoming memory request
:return: True if address is hit and false if miss
"""
assert self.active_buf_full_flag, 'Active buffer is not ready yet'
start_id, end_id = self.active_buffer_set_limits
if start_id < end_id:
for line_id in range(start_id, end_id):
this_set = self.hashed_buffer[line_id] # O(1) --> accessing hash
if addr in this_set: # Checking in a set(), O(1) lookup
return True
else:
for line_id in range(start_id, self.num_lines):
this_set = self.hashed_buffer[line_id] # O(1) --> accessing hash
if addr in this_set: # Checking in a set(), O(1) lookup
return True
for line_id in range(end_id):
this_set = self.hashed_buffer[line_id] # O(1) --> accessing hash
if addr in this_set: # Checking in a set(), O(1) lookup
return True
# Fixing for ISSUE #14
# return True
return False
#
[docs]
def service_reads(self, incoming_requests_arr_np, # 2D array with the requests
incoming_cycles_arr): # 1D vector with the cycles at which req arrived
"""
Method to service read requests coming from systolic array.
Logic: Always check if an addr is in active buffer.
If hit, return with hit latency
Else, make the contents of prefetch buffer as active and then check
Continue making new prefetches until there is a hit
:param incoming_requests_arr_np: matrix containg address of the memory requsts made from systolic array
:param incoming_cycles_arr: list containg cycles at which the memory requsts are made from systolic array
:return: A list of out cycles when the requests are serviced
"""
# Service the incoming read requests
# returns a cycles array corresponding to the requests buffer
# Logic: Always check if an addr is in active buffer.
# If hit, return with hit latency
# Else, make the contents of prefetch buffer as active and then check
# finish till an ongoing prefetch is done before reassiging prefetch buffer
if not self.active_buf_full_flag:
start_cycle = incoming_cycles_arr[0][0]
self.prefetch_active_buffer(start_cycle=start_cycle) # Needs to use the entire operand matrix
# keeping in mind the tile order and everything
out_cycles_arr = []
offset = self.hit_latency
# for cycle, request_line in tqdm(zip(incoming_cycles_arr, incoming_requests_arr_np)):
for i in tqdm(range(incoming_requests_arr_np.shape[0]), disable=True):
cycle = incoming_cycles_arr[i]
# Fixing for ISSUE #14
# request_line = set(incoming_requests_arr_np[i]) #shaves off a few seconds
request_line = incoming_requests_arr_np[i]
for addr in request_line:
if addr == -1:
continue
# if addr not in self.active_buffer_contents: #this is super slow!!!
# Fixing for ISSUE #14
# if not self.active_buffer_hit(addr): # --> While loop ensures multiple prefetches if needed
while not self.active_buffer_hit(addr):
self.new_prefetch()
potential_stall_cycles = self.last_prefect_cycle - (cycle + offset)
offset += potential_stall_cycles # Offset increments if there were potential stalls
out_cycles = cycle + offset
out_cycles_arr.append(out_cycles)
out_cycles_arr_np = np.asarray(out_cycles_arr).reshape((len(out_cycles_arr), 1))
return out_cycles_arr_np
#
[docs]
def prefetch_active_buffer(self, start_cycle):
"""
Method to prefetch the active read buffer before servicing individual memory requests
:return: None
"""
# Depending on size of the active buffer, calculate the number of lines from op mat to fetch
# Also, calculate the cycles arr for requests
# 1. Preparing the requests:
num_lines = math.ceil(self.active_buf_size / self.req_gen_bandwidth)
if not num_lines < self.fetch_matrix.shape[0]:
num_lines = self.fetch_matrix.shape[0]
requested_data_size = num_lines * self.req_gen_bandwidth
self.num_access += requested_data_size
start_idx = 0
end_idx = num_lines
prefetch_requests = self.fetch_matrix[start_idx:end_idx, :]
# 1.1 See if extra requests are made, if so nullify them
self.next_col_prefetch_idx = 0
if requested_data_size > self.active_buf_size:
valid_cols = int(self.active_buf_size % self.req_gen_bandwidth)
row = end_idx - 1
self.next_col_prefetch_idx = valid_cols
for col in range(valid_cols, self.req_gen_bandwidth):
prefetch_requests[row][col] = -1
# TODO: Tally and check if this agrees with the contents of the hashed buffer
# 2. Preparing the cycles array
# The start_cycle variable ensures that all the requests have been made before any incoming reads came
cycles_arr = np.zeros((num_lines, 1))
for i in range(cycles_arr.shape[0]):
cycles_arr[i][0] = -1 * (num_lines - start_cycle - (i - self.backing_buffer.get_latency()))
# 3. Send the request and get the response cycles count
response_cycles_arr = self.backing_buffer.service_reads(incoming_cycles_arr=cycles_arr,
incoming_requests_arr_np=prefetch_requests)
# 4. Update the variables
self.last_prefect_cycle = int(response_cycles_arr[-1][0])
# Update the trace matrix
self.trace_matrix = np.concatenate((response_cycles_arr, prefetch_requests), axis=1)
self.trace_valid = True
# Set active buffer contents
active_buf_start_line_id = 0
active_buf_end_line_id = self.num_active_buf_lines
self.active_buffer_set_limits = [active_buf_start_line_id, active_buf_end_line_id]
prefetch_buf_start_line_id = active_buf_end_line_id
prefetch_buf_end_line_id = prefetch_buf_start_line_id + self.num_prefetch_buf_lines
self.prefetch_buffer_set_limits = [prefetch_buf_start_line_id, prefetch_buf_end_line_id]
self.active_buf_full_flag = True
# Set the line to be prefetched next
# The module operator is to ensure that the indices wrap around
if requested_data_size > self.active_buf_size: # Some elements in the current idx is left out in this case
self.next_line_prefetch_idx = num_lines % self.fetch_matrix.shape[0]
else:
self.next_line_prefetch_idx = (num_lines + 1) % self.fetch_matrix.shape[0]
#
[docs]
def new_prefetch(self):
"""
Method to do a new prefetch. In a new prefetch, some portion of the original data needs to be \
deleted to accomodate the prefetched data
In this case we overwrite some data in the active buffer with the prefetched data \
and then create a new prefetch request
:return: None
"""
# In a new prefetch, some portion of the original data needs to be deleted to accomodate the prefetched data
# In this case we overwrite some data in the active buffer with the prefetched data
# And then create a new prefetch request
# Also return when the prefetched data was made available
# 1. Rewrite the active buffer
assert self.active_buf_full_flag, 'Active buffer is empty'
active_start, active_end = self.active_buffer_set_limits
active_start = int((active_start + self.num_prefetch_buf_lines) % self.num_lines)
active_end = int((active_start + self.num_active_buf_lines) % self.num_lines)
prefetch_start = active_end
prefetch_end = int((prefetch_start + self.num_prefetch_buf_lines) % self.num_lines)
self.active_buffer_set_limits = [active_start, active_end]
self.prefetch_buffer_set_limits = [prefetch_start, prefetch_end]
# 2. Create the request
start_idx = self.next_line_prefetch_idx
num_lines = math.ceil(self.prefetch_buf_size / self.req_gen_bandwidth)
end_idx = start_idx + num_lines
requested_data_size = num_lines * self.req_gen_bandwidth
self.num_access += requested_data_size
# In case we need to circle back
if end_idx > self.fetch_matrix.shape[0]:
last_idx = self.fetch_matrix.shape[0]
prefetch_requests = self.fetch_matrix[start_idx:,:]
new_end_idx = min(end_idx - last_idx, start_idx) # In case the entire array is engulfed
prefetch_requests = np.concatenate((prefetch_requests, self.fetch_matrix[:new_end_idx,:]))
else:
prefetch_requests = self.fetch_matrix[start_idx:end_idx, :]
# Modify the prefetch request to drop unwanted addresses
# a. Chomp the elements in the first line included in previous fetches
for i in range(0, self.next_col_prefetch_idx):
prefetch_requests[0][i] = -1
# b. Chomp the excess elements in the last line
if requested_data_size > self.active_buf_size:
valid_cols = int(self.active_buf_size % self.req_gen_bandwidth)
row = prefetch_requests.shape[0] - 1
for col in range(valid_cols, self.req_gen_bandwidth):
prefetch_requests[row][col] = -1
# 3. Create the request cycles
cycles_arr = np.zeros((num_lines, 1))
for i in range(cycles_arr.shape[0]):
# Fixing ISSUE #14
# cycles_arr[i][0] = self.last_prefect_cycle + i
cycles_arr[i][0] = self.last_prefect_cycle + i + 1
# 4. Send the request
response_cycles_arr = self.backing_buffer.service_reads(incoming_cycles_arr=cycles_arr,
incoming_requests_arr_np=prefetch_requests)
# 5. Update the variables
self.last_prefect_cycle = response_cycles_arr[-1][0]
assert response_cycles_arr.shape == cycles_arr.shape, 'The request and response cycles dims do not match'
this_prefetch_trace = np.concatenate((response_cycles_arr, prefetch_requests), axis=1)
self.trace_matrix = np.concatenate((self.trace_matrix, this_prefetch_trace), axis=0)
# Set the line to be prefetched next
if requested_data_size > self.active_buf_size:
self.next_line_prefetch_idx = num_lines % self.fetch_matrix.shape[0]
else:
self.next_line_prefetch_idx = (num_lines + 1) % self.fetch_matrix.shape[1]
# This does not need to return anything
#
[docs]
def get_trace_matrix(self):
"""
Method to get the read buffer trace matrix. It contains addresses requsted by the systolic array and \
the cycles (first column) at which the requests are made.
:return: Read buffer trace matrix
"""
if not self.trace_valid:
print('No trace has been generated yet')
return
return self.trace_matrix
#
[docs]
def get_hit_latency(self):
"""
Method to get hit latency of the read buffer.
:return: Hit latency of the read buffer
"""
return self.hit_latency
#
[docs]
def get_latency(self):
"""
Method to get hit latency of the read buffer.
:return: Hit latency of the read buffer
"""
return self.hit_latency
#
[docs]
def get_num_accesses(self):
"""
Method to get number of accesses of the read buffer if trace_valid flag is set.
:return: Number of accesses of the read buffer
"""
assert self.trace_valid, 'Traces not ready yet'
return self.num_access
#
[docs]
def get_external_access_start_stop_cycles(self):
"""
Method to get start and stop cycles of the read buffer if trace_valid flag is set.
:return: Start and stop cycles of the read buffer
"""
assert self.trace_valid, 'Traces not ready yet'
start_cycle = self.trace_matrix[0][0]
end_cycle = self.trace_matrix[-1][0]
return start_cycle, end_cycle
#
[docs]
def print_trace(self, filename):
"""
Method to write the read buffer trace matrix to a file.
:param filename: Name of the trace file
:return: None
"""
if not self.trace_valid:
print('No trace has been generated yet')
return
np.savetxt(filename, self.trace_matrix, fmt='%s', delimiter=",")