Source code for scalesim.memory.read_buffer

# Double buffer read memory implementation
# TODO: Verification Pending
import math
import numpy as np
from tqdm import tqdm

from scalesim.memory.read_port import read_port



[docs]
class read_buffer:
    """
    Class which runs the memory simulation of double buffered ifmap/filter SRAM.
    The double buffering helps to hide the DRAM latency when the SRAM is servicing requests from the systolic array \
    using one of the buffers while the other buffer prefetches from the DRAM.
    """
    def __init__(self):
        """
        The constructor method for the class
        """
        # Buffer properties: User specified
        self.total_size_bytes = 128
        self.word_size = 1                      # Bytes
        self.active_buf_frac = 0.9
        self.hit_latency = 1                    # Cycles after which a request is served if already in the buffer

        # Buffer properties: Calculated
        self.total_size_elems = math.floor(self.total_size_bytes / self.word_size)
        self.active_buf_size = int(math.ceil(self.total_size_elems * 0.9))
        self.prefetch_buf_size = self.total_size_elems - self.active_buf_size

        # Backing interface properties
        self.backing_buffer = read_port()
        self.req_gen_bandwidth = 100            # words per cycle

        # Status of the buffer
        self.hashed_buffer = dict()
        self.num_lines = 0
        self.num_active_buf_lines = 1
        self.num_prefetch_buf_lines = 1
        self.active_buffer_set_limits = []
        self.prefetch_buffer_set_limits = []

        # Variables to enable prefetching
        self.fetch_matrix = np.ones((1, 1))
        self.last_prefect_cycle = -1
        self.next_line_prefetch_idx = 0
        self.next_col_prefetch_idx = 0

        # Access counts
        self.num_access = 0

        # Trace matrix
        self.trace_matrix = np.ones((1, 1))

        # Flags
        self.active_buf_full_flag = False
        self.hashed_buffer_valid = False
        self.trace_valid = False

    #

[docs]
    def set_params(self, backing_buf_obj,
                   total_size_bytes=1, word_size=1, active_buf_frac=0.9,
                   hit_latency=1, backing_buf_bw=1
                   ):
        """
        Method to set the ifmap/filter double buffered memory simulation parameters for housekeeping.

        :param backing_buf_obj: Backing buffer object, by default is read_port
        :param total_size_bytes: Read buffer (SRAM) total size in bytes
        :param word_size: The word size of individual elements
        :param active_buf_frac: The active fraction of the double duffered ifmap/filter memory (serving the systolic array memory requests)
        :param hit_latency: Hit latency of the double duffered ifmap/filter memory
        :param backing_buf_bw: Bandwidth of the backing buffer for ifmap SRAM. The default backing buffer is a dummy one (read port).

        :return: None
        """
        self.total_size_bytes = total_size_bytes
        self.word_size = word_size

        assert 0.5 <= active_buf_frac < 1, "Valid active buf frac [0.5,1)"
        self.active_buf_frac = round(active_buf_frac, 2)
        self.hit_latency = hit_latency

        self.backing_buffer = backing_buf_obj
        self.req_gen_bandwidth = backing_buf_bw

        # Calculate these based on the values provided
        self.total_size_elems = math.floor(self.total_size_bytes / self.word_size)
        self.active_buf_size = int(math.ceil(self.total_size_elems * self.active_buf_frac))
        self.prefetch_buf_size = self.total_size_elems - self.active_buf_size


    #

[docs]
    def reset(self): # TODO: check if all resets are working propoerly
        """
        Method to reset the read buffer parameters.

        :return: None
        """
        # Buffer properties: User specified
        self.total_size_bytes = 128
        self.word_size = 1  # Bytes
        self.active_buf_frac = 0.9
        self.hit_latency = 1  # Cycles after which a request is served if already in the buffer

        # Buffer properties: Calculated
        self.total_size_elems = math.floor(self.total_size_bytes / self.word_size)
        self.active_buf_size = int(math.ceil(self.total_size_elems * 0.9))
        self.prefetch_buf_size = self.total_size_elems - self.active_buf_size

        # Backing interface properties
        self.backing_buffer = read_port()
        self.req_gen_bandwidth = 100  # words per cycle

        # Status of the buffer
        self.hashed_buffer = dict()
        self.active_buffer_set_limits = []
        self.prefetch_buffer_set_limits = []

        # Variables to enable prefetching
        self.fetch_matrix = np.ones((1, 1))
        self.last_prefect_cycle = -1
        self.next_line_prefetch_idx = 0
        self.next_col_prefetch_idx = 0

        # Access counts
        self.num_access = 0

        # Trace matrix
        self.trace_matrix = np.ones((1, 1))

        # Flags
        self.active_buf_full_flag = False
        self.hashed_buffer_valid = False
        self.trace_valid = False


    #

[docs]
    def set_fetch_matrix(self, fetch_matrix_np):
        """
        Method to set the fetch matrix responsible for prefetching from the DRAM 

        :return: None
        """
        # The operand matrix determines what to pre-fetch into both active and prefetch buffers
        # In 'user' mode, this will be set in the set_params

        num_elems = fetch_matrix_np.shape[0] * fetch_matrix_np.shape[1]
        num_lines = int(math.ceil(num_elems / self.req_gen_bandwidth))
        self.fetch_matrix = np.ones((num_lines, self.req_gen_bandwidth)) * -1

        # Put stuff into the fetch matrix
        # This is done to ensure that there is no shape mismatch
        # Not sure if this is the optimal way to do it or not
        for i in range(num_elems):
            src_row = math.floor(i / fetch_matrix_np.shape[1])
            src_col = math.floor(i % fetch_matrix_np.shape[1])

            dest_row = math.floor(i / self.req_gen_bandwidth)
            dest_col = math.floor(i % self.req_gen_bandwidth)

            self.fetch_matrix[dest_row][dest_col] = fetch_matrix_np[src_row][src_col]

        # Once the fetch matrices are set, populate the data structure for fast lookups and servicing
        self.prepare_hashed_buffer()


    #

[docs]
    def prepare_hashed_buffer(self):
        """
        Method to convert the fetch matrix into a hashed buffer for fast lookups.

        :return: None
        """
        elems_per_set = math.ceil(self.total_size_elems / 100)

        prefetch_rows = self.fetch_matrix.shape[0]
        prefetch_cols = self.fetch_matrix.shape[1]

        line_id = 0
        elem_ctr = 0
        current_line = set()

        for r in range(prefetch_rows):
            for c in range(prefetch_cols):
                elem = self.fetch_matrix[r][c]

                if not elem == -1:
                    current_line.add(elem)
                    elem_ctr += 1

                if not elem_ctr < elems_per_set:    # ie > or =
                    self.hashed_buffer[line_id] = current_line
                    line_id += 1
                    elem_ctr = 0
                    current_line = set()        # new set

        self.hashed_buffer[line_id] = current_line

        max_num_active_buf_lines = int(math.ceil(self.active_buf_size / elems_per_set))
        max_num_prefetch_buf_lines = int(math.ceil(self.prefetch_buf_size / elems_per_set))
        num_lines = line_id + 1

        if num_lines > max_num_active_buf_lines:
            self.num_active_buf_lines = max_num_active_buf_lines
        else:
            self.num_active_buf_lines = num_lines

        remaining_lines = num_lines - self.num_active_buf_lines

        if remaining_lines > max_num_prefetch_buf_lines:
            self.num_prefetch_buf_lines = max_num_prefetch_buf_lines
        else:
            self.num_prefetch_buf_lines = remaining_lines

        self.num_lines = num_lines
        self.hashed_buffer_valid = True


    #

[docs]
    def active_buffer_hit(self, addr):
        """
        Method to check if the address is hit or miss in the active read buffer.

        :param addr: Address of the incoming memory request

        :return: True if address is hit and false if miss
        """
        assert self.active_buf_full_flag, 'Active buffer is not ready yet'

        start_id, end_id = self.active_buffer_set_limits
        if start_id < end_id:
            for line_id in range(start_id, end_id):
                this_set = self.hashed_buffer[line_id]      # O(1) --> accessing hash
                if addr in this_set:                        # Checking in a set(), O(1) lookup
                    return True

        else:
            for line_id in range(start_id, self.num_lines):
                this_set = self.hashed_buffer[line_id]  # O(1) --> accessing hash
                if addr in this_set:  # Checking in a set(), O(1) lookup
                    return True

            for line_id in range(end_id):
                this_set = self.hashed_buffer[line_id]  # O(1) --> accessing hash
                if addr in this_set:  # Checking in a set(), O(1) lookup
                    return True
        # Fixing for ISSUE #14
        # return True
        return False


    #

[docs]
    def service_reads(self, incoming_requests_arr_np,   # 2D array with the requests
                            incoming_cycles_arr):       # 1D vector with the cycles at which req arrived
        """
        Method to service read requests coming from systolic array.
        Logic: Always check if an addr is in active buffer.
        If hit, return with hit latency
        Else, make the contents of prefetch buffer as active and then check
        Continue making new prefetches until there is a hit

        :param incoming_requests_arr_np: matrix containg address of the memory requsts made from systolic array
        :param incoming_cycles_arr: list containg cycles at which the memory requsts are made from systolic array

        :return: A list of out cycles when the requests are serviced
        """
        # Service the incoming read requests
        # returns a cycles array corresponding to the requests buffer
        # Logic: Always check if an addr is in active buffer.
        #        If hit, return with hit latency
        #        Else, make the contents of prefetch buffer as active and then check
        #              finish till an ongoing prefetch is done before reassiging prefetch buffer

        if not self.active_buf_full_flag:
            start_cycle = incoming_cycles_arr[0][0]
            self.prefetch_active_buffer(start_cycle=start_cycle)    # Needs to use the entire operand matrix
                                                                    # keeping in mind the tile order and everything

        out_cycles_arr = []
        offset = self.hit_latency
        # for cycle, request_line in tqdm(zip(incoming_cycles_arr, incoming_requests_arr_np)):
        for i in tqdm(range(incoming_requests_arr_np.shape[0]), disable=True):
            cycle = incoming_cycles_arr[i]
            # Fixing for ISSUE #14
            # request_line = set(incoming_requests_arr_np[i]) #shaves off a few seconds
            request_line = incoming_requests_arr_np[i]

            for addr in request_line:
                if addr == -1:
                    continue

                # if addr not in self.active_buffer_contents: #this is super slow!!!
                # Fixing for ISSUE #14
                # if not self.active_buffer_hit(addr):  # --> While loop ensures multiple prefetches if needed
                while not self.active_buffer_hit(addr):
                    self.new_prefetch()
                    potential_stall_cycles = self.last_prefect_cycle - (cycle + offset)
                    offset += potential_stall_cycles        # Offset increments if there were potential stalls

            out_cycles = cycle + offset
            out_cycles_arr.append(out_cycles)

        out_cycles_arr_np = np.asarray(out_cycles_arr).reshape((len(out_cycles_arr), 1))

        return out_cycles_arr_np


    #

[docs]
    def prefetch_active_buffer(self, start_cycle):
        """
        Method to prefetch the active read buffer before servicing individual memory requests

        :return: None
        """
        # Depending on size of the active buffer, calculate the number of lines from op mat to fetch
        # Also, calculate the cycles arr for requests

        # 1. Preparing the requests:
        num_lines = math.ceil(self.active_buf_size / self.req_gen_bandwidth)
        if not num_lines < self.fetch_matrix.shape[0]:
            num_lines = self.fetch_matrix.shape[0]

        requested_data_size = num_lines * self.req_gen_bandwidth
        self.num_access += requested_data_size

        start_idx = 0
        end_idx = num_lines

        prefetch_requests = self.fetch_matrix[start_idx:end_idx, :]

        # 1.1 See if extra requests are made, if so nullify them
        self.next_col_prefetch_idx = 0
        if requested_data_size > self.active_buf_size:
            valid_cols = int(self.active_buf_size % self.req_gen_bandwidth)
            row = end_idx - 1
            self.next_col_prefetch_idx = valid_cols
            for col in range(valid_cols, self.req_gen_bandwidth):
                prefetch_requests[row][col] = -1

        # TODO: Tally and check if this agrees with the contents of the hashed buffer

        # 2. Preparing the cycles array
        #    The start_cycle variable ensures that all the requests have been made before any incoming reads came
        cycles_arr = np.zeros((num_lines, 1))
        for i in range(cycles_arr.shape[0]):
            cycles_arr[i][0] = -1 * (num_lines - start_cycle - (i - self.backing_buffer.get_latency()))

        # 3. Send the request and get the response cycles count
        response_cycles_arr = self.backing_buffer.service_reads(incoming_cycles_arr=cycles_arr,
                                                                incoming_requests_arr_np=prefetch_requests)

        # 4. Update the variables
        self.last_prefect_cycle = int(response_cycles_arr[-1][0])

        # Update the trace matrix
        self.trace_matrix = np.concatenate((response_cycles_arr, prefetch_requests), axis=1)
        self.trace_valid = True

        # Set active buffer contents
        active_buf_start_line_id = 0
        active_buf_end_line_id = self.num_active_buf_lines
        self.active_buffer_set_limits = [active_buf_start_line_id, active_buf_end_line_id]

        prefetch_buf_start_line_id = active_buf_end_line_id
        prefetch_buf_end_line_id = prefetch_buf_start_line_id + self.num_prefetch_buf_lines
        self.prefetch_buffer_set_limits = [prefetch_buf_start_line_id, prefetch_buf_end_line_id]

        self.active_buf_full_flag = True

        # Set the line to be prefetched next
        # The module operator is to ensure that the indices wrap around
        if requested_data_size > self.active_buf_size:  # Some elements in the current idx is left out in this case
            self.next_line_prefetch_idx = num_lines % self.fetch_matrix.shape[0]
        else:
            self.next_line_prefetch_idx = (num_lines + 1) % self.fetch_matrix.shape[0]


    #

[docs]
    def new_prefetch(self):
        """
        Method to do a new prefetch. In a new prefetch, some portion of the original data needs to be  \
        deleted to accomodate the prefetched data
        In this case we overwrite some data in the active buffer with the prefetched data \
        and then create a new prefetch request

        :return: None
        """
        # In a new prefetch, some portion of the original data needs to be deleted to accomodate the prefetched data
        # In this case we overwrite some data in the active buffer with the prefetched data
        # And then create a new prefetch request
        # Also return when the prefetched data was made available

        # 1. Rewrite the active buffer
        assert self.active_buf_full_flag, 'Active buffer is empty'
        active_start, active_end = self.active_buffer_set_limits

        active_start = int((active_start + self.num_prefetch_buf_lines) % self.num_lines)
        active_end = int((active_start + self.num_active_buf_lines) % self.num_lines)
        prefetch_start = active_end
        prefetch_end = int((prefetch_start + self.num_prefetch_buf_lines) % self.num_lines)

        self.active_buffer_set_limits = [active_start, active_end]
        self.prefetch_buffer_set_limits = [prefetch_start, prefetch_end]

        # 2. Create the request
        start_idx = self.next_line_prefetch_idx
        num_lines = math.ceil(self.prefetch_buf_size / self.req_gen_bandwidth)
        end_idx = start_idx + num_lines
        requested_data_size = num_lines * self.req_gen_bandwidth
        self.num_access += requested_data_size

        # In case we need to circle back
        if end_idx > self.fetch_matrix.shape[0]:
            last_idx = self.fetch_matrix.shape[0]
            prefetch_requests = self.fetch_matrix[start_idx:,:]

            new_end_idx = min(end_idx - last_idx, start_idx)    # In case the entire array is engulfed
            prefetch_requests = np.concatenate((prefetch_requests, self.fetch_matrix[:new_end_idx,:]))
        else:
            prefetch_requests = self.fetch_matrix[start_idx:end_idx, :]

        # Modify the prefetch request to drop unwanted addresses
        # a. Chomp the elements in the first line included in previous fetches
        for i in range(0, self.next_col_prefetch_idx):
            prefetch_requests[0][i] = -1

        # b. Chomp the excess elements in the last line
        if requested_data_size > self.active_buf_size:
            valid_cols = int(self.active_buf_size % self.req_gen_bandwidth)
            row = prefetch_requests.shape[0] - 1
            for col in range(valid_cols, self.req_gen_bandwidth):
                prefetch_requests[row][col] = -1

        # 3. Create the request cycles
        cycles_arr = np.zeros((num_lines, 1))
        for i in range(cycles_arr.shape[0]):
            # Fixing ISSUE #14
            # cycles_arr[i][0] = self.last_prefect_cycle + i
            cycles_arr[i][0] = self.last_prefect_cycle + i + 1

        # 4. Send the request
        response_cycles_arr = self.backing_buffer.service_reads(incoming_cycles_arr=cycles_arr,
                                                                incoming_requests_arr_np=prefetch_requests)

        # 5. Update the variables
        self.last_prefect_cycle = response_cycles_arr[-1][0]

        assert response_cycles_arr.shape == cycles_arr.shape, 'The request and response cycles dims do not match'

        this_prefetch_trace = np.concatenate((response_cycles_arr, prefetch_requests), axis=1)
        self.trace_matrix = np.concatenate((self.trace_matrix, this_prefetch_trace), axis=0)

        # Set the line to be prefetched next
        if requested_data_size > self.active_buf_size:
            self.next_line_prefetch_idx = num_lines % self.fetch_matrix.shape[0]
        else:
            self.next_line_prefetch_idx = (num_lines + 1) % self.fetch_matrix.shape[1]


        # This does not need to return anything

    #

[docs]
    def get_trace_matrix(self):
        """
        Method to get the read buffer trace matrix. It contains addresses requsted by the systolic array and \
        the cycles (first column) at which the requests are made.

        :return: Read buffer trace matrix
        """
        if not self.trace_valid:
            print('No trace has been generated yet')
            return

        return self.trace_matrix


    #

[docs]
    def get_hit_latency(self):
        """
        Method to get hit latency of the read buffer.

        :return: Hit latency of the read buffer
        """
        return self.hit_latency


    #

[docs]
    def get_latency(self):
        """
        Method to get hit latency of the read buffer.

        :return: Hit latency of the read buffer
        """
        return self.hit_latency


    #

[docs]
    def get_num_accesses(self):
        """
        Method to get number of accesses of the read buffer if trace_valid flag is set.

        :return: Number of accesses of the read buffer
        """
        assert self.trace_valid, 'Traces not ready yet'
        return self.num_access


    #

[docs]
    def get_external_access_start_stop_cycles(self):
        """
        Method to get start and stop cycles of the read buffer if trace_valid flag is set.

        :return: Start and stop cycles of the read buffer
        """
        assert self.trace_valid, 'Traces not ready yet'
        start_cycle = self.trace_matrix[0][0]
        end_cycle = self.trace_matrix[-1][0]

        return start_cycle, end_cycle


    #

[docs]
    def print_trace(self, filename):
        """
        Method to write the read buffer trace matrix to a file.

        :param filename: Name of the trace file 

        :return: None
        """
        if not self.trace_valid:
            print('No trace has been generated yet')
            return

        np.savetxt(filename, self.trace_matrix, fmt='%s', delimiter=",")