this is the function i\'m about to write void transpose_submit(size_t M, size_t
ID: 3811362 • Letter: T
Question
this is the function i'm about to write
void transpose_submit(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp){}
Performance (26 pts) For each matrix size, the performance of your transpose submit function is evaluated by using LLVM-based instrumentation to extract the address trace for your function, and then using the reference simulator to replay this trace on a cache with parameters s 5, E 1, b 6). Using the reference cache simulator, each transpose function will be assigned some number of clock cycles m A cache miss is worth 100 clock cycles, while a cache hit is worth 4. Your performance score for each matrix size will scale linearly with m up to some threshold. The scores are computed as: 32 x 32 10 points if m 35,000 0 points if m 45,000 64 x 64 10 points if m 150,000,0 points if m 200,000 63 x 65 6 points if m 280,000, 0 points if m 350,000 For example, a solution for the 32 x 32 matrix with 1764 hits and 284 misses (m 1764 x 4 284 x 100 35456) would receive 9.5 of the possible 10 points You can optimize your code specifically for the three cases in the performance evaluation. In particular, it is perfectly OK for your function to explicitly check for the matrix sizes and implement separate code optimized for each case.Explanation / Answer
/*
*
* Each transpose function must have a prototype of the form:
* void trans(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp);
* A is the source matrix, B is the destination
* tmp points to a region of memory able to hold TMPCOUNT (set to 256) doubles as temporaries
*
* A transpose function is evaluated by counting the number of misses
* on a 2KB direct mapped cache with a block size of 64 bytes.
*
*
*/
#include <stdio.h>
#include <stdbool.h>
/* Forward declarations */
bool is_transpose(size_t M, size_t N, double A[N][M], double B[M][N]);
void trans(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp);
void trans_tmp(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp);
void trans_32_block(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp);
void trans_64_block(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp);
void trans_63_x_65(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp);
/*
* transpose_submit - This is the solution transpose function that you
* will be graded on for Part B of the assignment. Do not change
* the description string "Transpose submission", as the driver
* searches for that string to identify the transpose function to
* be graded.
*/
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp)
{
/*
* This is a good place to call your favorite transposition functions
* It's OK to choose different functions based on array size, but
* your code must be correct for all values of M and N
*/
if (M == 32 && N == 32){
trans_32_block(M, N, A, B, tmp);
} else if (M == 64 && N == 64){
trans_64_block(M, N, A, B, tmp);
} else{
trans_63_x_65(M, N, A, B, tmp);
}
}
/*
* You can define additional transpose functions below. We've defined
* some simple ones below to help you get started.
*/
char trans_32_block_desc[] = "32x32 square blocking";
//using blocking, as recommended by handout and proper loop ordering
void trans_32_block(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp){
int r, c, rBlock, cBlock;
int diag = 0;
int blockSize = 8; //found thru trial-and-error
for (cBlock = 0; cBlock < M; cBlock = cBlock + blockSize){
for (rBlock = 0; rBlock < N; rBlock = rBlock + blockSize){
for (r = rBlock; r < rBlock +blockSize; r++){
for (c = cBlock; c < cBlock + blockSize; c++){
if (c != r){
B[c][r] = A[r][c];
} else{ //(c == r)
tmp[0] = A[r][c]; //diagonal case
diag = c; //could be c or r
}
}
//diagonals on square matrix can transpose nicely
if (cBlock == rBlock) B[diag][diag] = tmp[0];
}
}
}
ENSURES(is_transpose(M, N, A, B));
}
char trans_64_block_desc[] = "64x64 square blocking";
//using same principle as 32x32, but must adjust blockSize
void trans_64_block(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp){
int r, c, rBlock, cBlock;
int diag = 0;
int blockSize = 4; //found thru trial-and-error
for (cBlock = 0; cBlock < M; cBlock = cBlock + blockSize){
for (rBlock = 0; rBlock < N; rBlock = rBlock + blockSize){
for (r = rBlock; r < rBlock +blockSize; r++){
for (c = cBlock; c < cBlock + blockSize; c++){
if (c != r){
B[c][r] = A[r][c];
} else{ //(c == r)
tmp[0] = A[r][c]; //diagonal case
diag = c; //could be c or r
}
}
//diagonals on square matrix can transpose nicely
if (cBlock == rBlock) B[diag][diag] = tmp[0];
}
}
}
ENSURES(is_transpose(M, N, A, B));
}
char trans_63_x_65_desc[] = "63x65 odd square blocking";
//using same principle as 64x64, but must watch boundaries
void trans_63_x_65(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp){
int r, c, rBlock, cBlock;
int diag = 0;
int blockSize = 4; //similar to 64x64, with exceptions
for (cBlock = 0; cBlock < M; cBlock = cBlock + blockSize){
for (rBlock = 0; rBlock < N; rBlock = rBlock + blockSize){
for (r = rBlock; (r < rBlock+blockSize) && r < N ; r++){
for (c = cBlock; (c < cBlock+blockSize) && c < M; c++){
if (c != r){
B[c][r] = A[r][c];
} else{ //(c == r)
tmp[0] = A[r][c]; //diagonal case
diag = c; //could be c or r
}
}
//diagonals on square matrix can transpose nicely
if (cBlock == rBlock) B[diag][diag] = tmp[0];
}
}
}
ENSURES(is_transpose(M, N, A, B));
}
/*
* trans - A simple baseline transpose function, not optimized for the cache.
*/
char trans_desc[] = "Simple row-wise scan transpose";
/*
* The following shows an example of a correct, but cache-inefficient
* transpose function. Note the use of macros (defined in
* contracts.h) that add checking code when the file is compiled in
* debugging mode. See the Makefile for instructions on how to do
* this.
*
* IMPORTANT: Enabling debugging will significantly reduce your
* cache performance. Be sure to disable this checking when you
* want to measure performance.
*/
void trans(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp)
{
size_t i, j;
REQUIRES(M > 0);
REQUIRES(N > 0);
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
B[j][i] = A[i][j];
}
}
ENSURES(is_transpose(M, N, A, B));
}
/*
* This is a contrived example to illustrate the use of the temporary array
*/
char trans_tmp_desc[] =
"Simple row-wise scan transpose, using a 2X2 temporary array";
void trans_tmp(size_t M, size_t N, double A[N][M], double B[M][N], double *tmp)
{
size_t i, j;
/* Use the first four elements of tmp as a 2x2 array with row-major ordering. */
REQUIRES(M > 0);
REQUIRES(N > 0);
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
int di = i%2;
int dj = j%2;
tmp[2*di+dj] = A[i][j];
B[j][i] = tmp[2*di+dj];
}
}
ENSURES(is_transpose(M, N, A, B));
}
/*
* registerFunctions - This function registers your transpose
* functions with the driver. At runtime, the driver will
* evaluate each of the registered functions and summarize their
* performance. This is a handy way to experiment with different
* transpose strategies.
*/
void registerFunctions()
{
/* Register your solution function */
registerTransFunction(transpose_submit, transpose_submit_desc);
/* Register any additional transpose functions */
//registerTransFunction(trans, trans_desc);
//registerTransFunction(trans_tmp, trans_tmp_desc);
}
/*
* is_transpose - This helper function checks if B is the transpose of
* A. You can check the correctness of your transpose by calling
* it before returning from the transpose function.
*/
bool is_transpose(size_t M, size_t N, double A[N][M], double B[M][N])
{
size_t i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < M; ++j) {
if (A[i][j] != B[j][i]) {
return false;
}
}
}
return true;
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.