[施工完成] CSAPP Cachelab
背景
CSAPP:3e 的配套实验 地址 分成了两个部分,第一部分是模拟一下cache的miss,hit,evict的规则。第二部分是优化一个矩阵的转置,使得miss尽可能少。
PART A
给了一个标程csim-ref,要求实现一个程序,输出与该标程一致。
写的时候太心急了。。导致没看完作业要求就开始写了。。 然后就纠结了好久。。如果要访问的地址超过了一个block line的边界该怎么办。。
然而实际上题目里已经把这种情况排除了。。
For this this lab, you should assume that memory accesses are aligned properly, such that a single memory access never crosses block boundaries. By making this assumption, you can ignore the request sizes in the valgrind traces
这样的话。。就没什么难度了。。 LRU的替换策略用纯c去撸hashmap+list有点烦。。。干脆就写了暴力的实现。。反正是模拟题(x
1
2#include <getopt.h>
3#include <stdbool.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <unistd.h>
8
9#include "cachelab.h"
10
11const unsigned int address_space_size = 64;
12// https://stackoverflow.com/questions/9642732/parsing-command-line-arguments-in-c
13
14typedef struct {
15 bool verbose;
16 int set_index_bits;
17 int lines_per_set;
18 int block_bits;
19 char *tracefile;
20
21} Arg;
22
23void ParseArgs(int argc, char **argv, Arg *arg) {
24 int opt;
25 while ((opt = getopt(argc, argv, "hvs:E:b:t:")) != -1) {
26 switch (opt) {
27 case 'h':
28 printf("Usage: ./csim-ref [-hv] -s <s> -E <E> -b <b> -t <tracefile>\n");
29 break;
30 case 'v':
31 arg->verbose = true;
32 break;
33 case 's':
34 arg->set_index_bits = atoi(optarg);
35 break;
36 case 'E':
37 arg->lines_per_set = atoi(optarg);
38 break;
39 case 'b':
40 arg->block_bits = atoi(optarg);
41 break;
42 case 't':
43 arg->tracefile = optarg;
44 break;
45 default:
46 abort();
47 }
48 }
49}
50
51typedef unsigned long long ULL;
52// 注意: 地址是十六进制表示。。。debug 到去世。。
53typedef struct {
54 ULL tag_bits;
55 ULL index_bits;
56 ULL offset_bits;
57 ULL block_number;
58
59} AddressBits;
60// 64 bit address,m=64
61// tag bit t = m-(b+s)
62// parse 64 address bits into tag bits,index bits and offset bits
63void ParseAddress(const Arg *arg, const ULL raw_addr, AddressBits *addr) {
64 const unsigned int index_and_block_bit_length =
65 arg->block_bits + arg->set_index_bits;
66 ULL block_bit_mask = (1ULL << arg->block_bits) - 1;
67 ULL index_bit_mask = ((1ULL << arg->set_index_bits) - 1) << arg->block_bits;
68 ULL tag_bit_mask =
69 ((1ULL << (address_space_size - index_and_block_bit_length)) - 1)
70 << index_and_block_bit_length;
71 // printf("index_and_block_bit_length:%d\n",index_and_block_bit_length);
72 // printf("mask:%lld %lld %lld\n", block_bit_mask, index_bit_mask,
73 // tag_bit_mask);
74 addr->tag_bits = (raw_addr & tag_bit_mask) >> index_and_block_bit_length;
75 addr->index_bits = (raw_addr & index_bit_mask) >> arg->block_bits;
76 addr->offset_bits = raw_addr & block_bit_mask;
77 addr->block_number = raw_addr / (1ULL << arg->block_bits);
78 // printf("addr:%llx tag:%lld index:%lld osset:%lld block:%lld \n", raw_addr,
79 // addr->tag_bits, addr->index_bits, addr->offset_bits,
80 // addr->block_number);
81}
82void TestParseAddress(const Arg *arg) {
83 for (int addr = 0; addr < (1ULL << address_space_size); addr++) {
84 AddressBits tmp_addr;
85 ParseAddress(arg, addr, &tmp_addr);
86 }
87}
88typedef struct {
89 enum operation { instruction_load, data_load, data_store, data_modify } op;
90 ULL address;
91 int size;
92} Trace;
93#define MAX_NUM_OF_TRACE 300000
94Trace trace[MAX_NUM_OF_TRACE];
95int lines_in_trace_file = 0;
96void ParseTraceFile(const char *file) {
97 FILE *fptr = fopen(file, "r");
98 if (fptr == NULL) {
99 printf("Error!");
100 exit(1);
101 }
102
103 char op[5];
104 ULL addr;
105 int cnt = 0, siz;
106 while (fscanf(fptr, "%s %llx,%d", op, &addr, &siz) != EOF) {
107 // printf("op:%s addr:%lld size:%d\n", op, addr, siz);
108 trace[cnt].address = addr;
109 trace[cnt].size = siz;
110 if (op[0] == 'L') {
111 trace[cnt].op = data_load;
112 } else if (op[0] == 'I') {
113 trace[cnt].op = instruction_load;
114 } else if (op[0] == 'S') {
115 trace[cnt].op = data_store;
116 } else if (op[0] == 'M') {
117 trace[cnt].op = data_modify;
118 }
119 cnt++;
120 }
121 lines_in_trace_file = cnt;
122
123 fclose(fptr);
124}
125
126void TestParseTraceFile() {
127 char *file = "direct-mapped-4-bit.trace";
128 ParseTraceFile(file);
129}
130
131// 不需要真的做cache,只需要模拟hit,miss,evition 的次数即可
132
133typedef struct {
134 unsigned int valid;
135 unsigned int tag;
136 // used for LRU policy
137 // use line number of trace file as age
138 // the smallest age is the least recent used line
139 unsigned int age;
140} Line;
141
142typedef struct {
143 int num_of_lines;
144 Line *lines;
145} Set;
146
147Set *set;
148struct cache_summary_t {
149 int hits;
150 int misses;
151 int evitions;
152} cache_summary;
153char operation_table[5] = {'I', 'L', 'S', 'M'};
154
155bool CheckCacheLineHit(const AddressBits addr, Set *cur_set, const int index) {
156 bool any_of_line_hit = false;
157 for (int j = 0; j < cur_set->num_of_lines; j++) {
158 Line *cur_line = &cur_set->lines[j];
159 if (cur_line->valid == 1 && cur_line->tag == addr.tag_bits) {
160 // printf("addr.tag_bits:%lld\n", addr.tag_bits);
161 cache_summary.hits++;
162 cur_line->age = index;
163 any_of_line_hit = true;
164 break;
165 }
166 }
167 return any_of_line_hit;
168}
169
170bool CheckCacheLineEmpty(const AddressBits addr, Set *cur_set,
171 const int index) {
172 bool any_of_line_empty = false;
173 for (int j = 0; j < cur_set->num_of_lines; j++) {
174 Line *cur_line = &cur_set->lines[j];
175 if (cur_line->valid == 0) {
176 cache_summary.misses++;
177 cur_line->valid = 1;
178 cur_line->tag = addr.tag_bits;
179 cur_line->age = index;
180
181 any_of_line_empty = true;
182 break;
183 }
184 }
185 return any_of_line_empty;
186}
187
188void NaiveLru(const AddressBits addr, Set *cur_set, const int index) {
189 int evict_line;
190 int min_age = lines_in_trace_file;
191 // printf("set[%lld]->num_of_lines:%d\n",addr.index_bits,cur_set->num_of_lines);
192 for (int j = 0; j < cur_set->num_of_lines; j++) {
193 Line *cur_line = &cur_set->lines[j];
194 // printf("cur_line age:%d\n",cur_line->age);
195 if (cur_line->age < min_age) {
196 min_age = cur_line->age;
197 evict_line = j;
198 }
199 }
200 // printf("evict_line:%d min_age:%d\n",evict_line,min_age);
201 cache_summary.misses++;
202 cache_summary.evitions++;
203 cur_set->lines[evict_line].tag = addr.tag_bits;
204 cur_set->lines[evict_line].age = index;
205}
206
207void EvicteCacheLine(const AddressBits addr, Set *cur_set, const int index) {
208 // I'm too lazy to use Doubly linked list and hashmap
209 NaiveLru(addr, cur_set, index);
210}
211
212void SimulateCache(const Arg *arg) {
213 memset(&cache_summary, 0, sizeof(cache_summary));
214 // instruction_load, data_load, data_store, data_modify
215
216 for (int i = 0; i < lines_in_trace_file; i++) {
217 // ignore instruction load
218 if (trace[i].op == 0) continue;
219 AddressBits addr;
220 ParseAddress(arg, trace[i].address, &addr);
221 Set *cur_set = &set[addr.index_bits];
222 // data load or data store
223 if (trace[i].op <= 2) {
224 bool any_of_line_hit = CheckCacheLineHit(addr, cur_set, i);
225 if (any_of_line_hit) {
226 if (arg->verbose) {
227 printf("%c %lld,%d hit\n", operation_table[trace[i].op],
228 trace[i].address, trace[i].size);
229 }
230 continue;
231 }
232 bool any_of_line_empty = CheckCacheLineEmpty(addr, cur_set, i);
233 if (any_of_line_empty) {
234 if (arg->verbose) {
235 printf("%c %lld,%d miss\n", operation_table[trace[i].op],
236 trace[i].address, trace[i].size);
237 }
238 continue;
239 }
240 EvicteCacheLine(addr, cur_set, i);
241 if (arg->verbose) {
242 printf("%c %lld,%d miss eviction\n", operation_table[trace[i].op],
243 trace[i].address, trace[i].size);
244 }
245 } else {
246 // data modify
247 bool any_of_line_hit_in_data_load = CheckCacheLineHit(addr, cur_set, i);
248 if (any_of_line_hit_in_data_load) {
249 // if 'data load' hit cache, then 'data store' must also hit cache
250 cache_summary.hits++;
251 if (arg->verbose) {
252 printf("%c %lld,%d hit hit \n", operation_table[trace[i].op],
253 trace[i].address, trace[i].size);
254 }
255 continue;
256 }
257 bool any_of_line_empty = CheckCacheLineEmpty(addr, cur_set, i);
258 if (any_of_line_empty) {
259 cache_summary.hits++;
260 if (arg->verbose) {
261 printf("%c %lld,%d miss hit\n", operation_table[trace[i].op],
262 trace[i].address, trace[i].size);
263 }
264 continue;
265 }
266 EvicteCacheLine(addr, cur_set, i);
267 if (arg->verbose) {
268 cache_summary.hits++;
269 printf("%c %lld,%d miss eviction hit\n", operation_table[trace[i].op],
270 trace[i].address, trace[i].size);
271 }
272 }
273 }
274}
275int main(int argc, char **argv) {
276 Arg arg;
277 ParseArgs(argc, argv, &arg);
278 ParseTraceFile(arg.tracefile);
279 int total_set_num = 1 << arg.set_index_bits;
280 // printf("total_set_num:%d\n",total_set_num);
281 set = (Set *)malloc(total_set_num * sizeof(Set));
282 memset(set, 0, total_set_num * sizeof(Set));
283 for (int i = 0; i < total_set_num; i++) {
284 set[i].lines = (Line *)malloc(arg.lines_per_set * sizeof(Line));
285 memset(set[i].lines, 0, arg.lines_per_set * sizeof(Line));
286 set[i].num_of_lines = arg.lines_per_set;
287 }
288 SimulateCache(&arg);
289 printSummary(cache_summary.hits, cache_summary.misses,
290 cache_summary.evitions);
291
292 for (int i = 0; i < total_set_num; i++) {
293 free(set[i].lines);
294 }
295 free(set);
296
297 return 0;
298}
299
300
PART B
三个小题。。矩阵的size分别是:
- 32*32
- 64*64
- 61*67
很重要的一点是。。输入就是这三组固定值。。。题目是允许根据不同的输入。。来用不同的实现应对的。
32*32
1
2int tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3 for (int i = 0; i < N; i += 8) {
4 for (int j = 0; j < M; j++) {
5 tmp = A[i][j];
6 tmp1 = A[i + 1][j];
7 tmp2 = A[i + 2][j];
8 tmp3 = A[i + 3][j];
9 tmp4 = A[i + 4][j];
10 tmp5 = A[i + 5][j];
11 tmp6 = A[i + 6][j];
12 tmp7 = A[i + 7][j];
13 B[j][i] = tmp;
14 B[j][i + 1] = tmp1;
15 B[j][i + 2] = tmp2;
16 B[j][i + 3] = tmp3;
17 B[j][i + 4] = tmp4;
18 B[j][i + 5] = tmp5;
19 B[j][i + 6] = tmp6;
20 B[j][i + 7] = tmp7;
21 }
22 }
23
61*67
1
2 int tmp[8];
3 for (int block_i_start = 0; block_i_start < N; block_i_start += 8) {
4 for (int block_j_start = 0; block_j_start < M; block_j_start += 8) {
5 for (int i = block_i_start; i < min(block_i_start + 8, N); i++) {
6 for (int j = block_j_start; j < min(M, block_j_start + 8); j++) {
7 tmp[j - block_j_start] = A[i][j];
8 }
9 for (int j = block_j_start; j < min(M, block_j_start + 8); j++) {
10 B[j][i] = tmp[j - block_j_start];
11 }
12
13 }
14 }
15 }
64*64
略难。。留个坑。。日后填
最终分数
64*64的没拿到分数。。其他部分是满分
1
2Cache Lab summary:
3 Points Max pts Misses
4Csim correctness 27.0 27
5Trans perf 32x32 8.0 8 287
6Trans perf 64x64 0.0 8 4723
7Trans perf 61x67 10.0 10 1997
8 Total points 45.0 53
9
Posts in this Series
- [施工完成] CSAPP shell lab
- [施工完成] CSAPP Malloc lab
- [施工完成] CSAPP Cachelab
- 【施工完成】CSAPP archlab
- 【施工完成】CSAPP attacklab
- 【施工完成】CSAPP bomb lab
- 【施工完成】CSAPP data lab