Skip to main content
  1. Posts/
  2. Today I Learned/

CSAPP Architecture Lab

·2974 words·14 mins
Jiho Kim
Author
Jiho Kim
๋‹ฌ๋ ค ๋˜ ๋‹ฌ๋ ค

๐Ÿ“ ์ƒ์„ธ ์ •๋ฆฌ
#

Part A
#

sum.ys
#

  • Y86_64 ๋ฌธ๋ฒ•์œผ๋กœ ์—ฐ๊ฒฐ๋ฆฌ์ŠคํŠธ์˜ ์›์†Œ๋“ค์„ ๋”ํ•˜๋Š” C ์ฝ”๋“œ๋ฅผ ๋ฐ”๊พธ์ž.
.pos 0
    irmovq stack, %rsp
    call main
    halt

.align 8
ele1:
    .quad 0x00a
    .quad ele2
ele2:
    .quad 0x0b0
    .quad ele3
ele3:
    .quad 0xc00
    .quad 0

main:
    irmovq ele1, %rdi
    call sum_list
    ret

sum_list:
    irmovq $0, %rax
sum_list_while:
    andq %rdi, %rdi
    je sum_list_while_end
    mrmovq (%rdi), %rbx
    addq %rbx, %rax
    mrmovq 8(%rdi), %rdi
    jmp sum_list_while
sum_list_while_end:
    ret

.pos 0x200
stack:

rsum.ys
#

  • sum.ys์™€ ๊ฐ™์ง€๋งŒ, ์žฌ๊ท€ํ•จ์ˆ˜๋กœ ๊ตฌ์„ฑํ•˜์ž.
.pos 0
    irmovq stack, %rsp
    call main
    halt

.align 8
ele1:
    .quad 0x00a
    .quad ele2
ele2:
    .quad 0x0b0
    .quad ele3
ele3:
    .quad 0xc00
    .quad 0

main:
    irmovq ele1, %rdi
    call rsum_list
    ret

rsum_list:
    andq %rdi, %rdi
    jne rsum_list_func
    irmovq $0, %rax
    ret
rsum_list_func:
    pushq %rbx
    mrmovq (%rdi), %rbx
    mrmovq 8(%rdi), %rdi
    call rsum_list
    addq %rbx, %rax
    popq %rbx
    ret

.pos 0x200
stack:

copy.ys
#

  • ์ด๋ฒˆ์—” source ๋ธ”๋Ÿญ์—์„œ destination ๋ธ”๋Ÿญ์œผ๋กœ ๊ฐ’์„ ๋ณต์‚ฌํ•˜๋Š”๊ฑธ ์ˆ˜ํ–‰ํ•˜์ž
.pos 0
    irmovq stack, %rsp
    call main
    halt

.align 8
src:
    .quad 0x00a
    .quad 0x0b0
    .quad 0xc00
dest:
    .quad 0x111
    .quad 0x222
    .quad 0x333

main:
    irmovq src, %rdi
    irmovq dest, %rsi
    irmovq $3, %rdx
    call copy_block
    ret

copy_block:
    xorq %rax, %rax // result = 0
copy_block_while:
    andq %rdx, %rdx
    je copy_block_end
    irmovq $8, %rcx
    mrmovq (%rdi), %rbx // val = *src
    addq %rcx, %rdi // src++
    rmmovq %rbx, (%rsi) // *dest = val
    addq %rcx, %rsi // dest++
    xorq %rbx, %rax // result ^= val
    irmovq $1, %rbx
    subq %rbx, %rdx
    jmp copy_block_while
copy_block_end:
    ret

.pos 0x200
stack:

Part B
#

  • SEQ ํ”„๋กœ์„ธ์„œ (seq-full.hcl)์— iaddq ๋ช…๋ น์–ด๋ฅผ ์ถ”๊ฐ€ํ•˜์ž.
  • Fetch / Decode / Execute / Mmory / Write-back๋ฅผ ์ž˜ ์ฒ˜๋ฆฌํ•ด์•ผ๊ฒ ๋„ค…
    • ์ฒœ์ฒœํžˆ ํ•ด๋ณด์ž

Fetch
#

################ Fetch Stage     ###################################

# Determine instruction code
word icode = [
	imem_error: INOP;
	1: imem_icode;		# Default: get from instruction memory
];

# Determine instruction function
word ifun = [
	imem_error: FNONE;
	1: imem_ifun;		# Default: get from instruction memory
];

bool instr_valid = icode in 
	{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
	       IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };

# Does fetched instruction require a regid byte?
bool need_regids =
	icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
		     IIRMOVQ, IRMMOVQ, IMRMOVQ };

# Does fetched instruction require a constant word?
bool need_valC =
	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL };
  • iaddq๋ฅผ ๋ญ๊ฐ€ ํ•„์š”ํ• ๊นŒ?
  • ์ผ๋‹จ icode, ifun ์€ ๋ฌผ๋ก  ์ž˜ ์žˆ๋‹ค๊ณ  ์ƒ๊ฐํ•˜๊ณ , ์•„๋ž˜๋ฅผ ์ƒ๊ฐํ•ด๋ณด์ž.
  • instr_valid๋ฅผ true๋กœ ๋งŒ๋“ค๊ธฐ ์œ„ํ•ด ์ €๊ธฐ์—๋‹ค๊ฐ€ IIADDQ๋ฅผ ์ถ”๊ฐ€ํ•˜๊ณ ,
  • ๋ ˆ์ง€์Šคํ„ฐ๊ฐ€ ํ•„์š”ํ•œ๊ฐ€? ์•„๋ฌด๋ž˜๋„ ๋ ˆ์ง€์Šคํ„ฐ์— ๋”ํ•ด์•ผํ•œ๋‹ค.
  • val_C๋„ ํ•„์š”ํ•œ๊ฐ€? ์•„๋ฌด๋ž˜๋„ ์ƒ์ˆ˜๊ฐ’์„ ๋”ํ•˜๋ ค๋ฉด ํ•„์š”ํ•˜๋‹ค. ๋‹ค ์ถ”๊ฐ€ํ•ด์ฃผ์ž.

Decode
#

################ Decode Stage    ###################################

## What register should be used as the A source?
word srcA = [
	icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
	icode in { IPOPQ, IRET } : RRSP;
	1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
	icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : rB;
	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
	1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
	icode in { IRRMOVQ } && Cnd : rB;
	icode in { IIRMOVQ, IOPQ} : rB;
	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
	1 : RNONE;  # Don't write any register
];

## What register should be used as the M destination?
word dstM = [
	icode in { IMRMOVQ, IPOPQ } : rA;
	1 : RNONE;  # Don't write any register
];
  • srcA, srcB, dstE, dstM์ค‘์—์„  ๋ญ๊ฐ€ ํ•„์š”ํ• ๊นŒ?
  • ์•„๋ฌด๋ž˜๋„ ๊ฐ€์ ธ์˜ฌํ•„์š” ์—†์œผ๋‹ˆ srcA, srcB๋Š” ํ•„์š” ์—†๋Š”๊ฑฐ๊ฐ™๊ณ , dst๋งŒ ์žˆ์œผ๋ฉด ๋ ๊ฒƒ๊ฐ™๋‹ค.
    • ์•„๋‹ˆ์ง€, srcB์—๋‹ค๊ฐ€ ๋”ํ•ด์•ผํ•˜๋Š”๊ฑฐ๋‹ˆ๊นŒ srcB๋„ ๊ฐ€์ ธ์™€์•ผํ•œ๋‹ค.
  • ๋ฉ”๋ชจ๋ฆฌ์—์„œ ๊ธ์–ด์˜จ๊ฒŒ ์•„๋‹ˆ๋ผ Execute์—์„œ ์–ป์„ ์ˆ˜ ์žˆ๋Š” ๊ฐ’์ด๋‹ˆ, dstE์— ์ถ”๊ฐ€ํ•˜๋ฉด ๋˜๊ฒ ๋‹ค.
    • ๊ทผ๋ฐ Cnd๋Š” ๋ญ์ง€? condition์ค‘์— ํ•˜๋‚˜์ธ๊ฑฐ๊ฐ™์€๋ฐ, ์ผ๋‹จ ๋ฌด์กฐ๊ฑด rB๋Š” ๋˜์–ด์•ผํ•˜๋‹ˆ ๋‘๋ฒˆ์งธ์ค„์— ๋„ฃ์ž.

Execute
#

################ Execute Stage   ###################################

## Select input A to ALU
word aluA = [
	icode in { IRRMOVQ, IOPQ } : valA;
	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : valC;
	icode in { ICALL, IPUSHQ } : -8;
	icode in { IRET, IPOPQ } : 8;
	# Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
	icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
		      IPUSHQ, IRET, IPOPQ } : valB;
	icode in { IRRMOVQ, IIRMOVQ } : 0;
	# Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
	icode == IOPQ : ifun;
	1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = icode in { IOPQ };
  • ๊ณ„์‚ฐ์ด๋ž‘ ์กฐ๊ฑด๋น„ํŠธ ์„ค์ •๊นŒ์ง€ ๋‹ค ํ•ด์•ผํ•˜๋Š” Execute ๋‹จ๊ณ„์ด๋‹ค.
  • aluA์— valC๋ฅผ, aluB์— valB๋ฅผ ๋„ฃ์–ด์•ผ๊ฒ ์ง€?
  • alufun์€ ์•„๋‹ˆ๊ณ , setcc๋Š” ์ผœ์•ผ๊ฒ ๋‹ค.

Memory
#

  • ์—ฌ๊ธฐ์„  ๋ญ ํ• ๊ฒŒ ์—†์ฃ  ๋ฉ”๋ชจ๋ฆฌ์— ์ ‘๊ทผ์„ ์•ˆํ•˜๋Š”๋””

Write back
#

  • ์—? ์›๋ณธ ์ฝ”๋“œ์—๋Š” ์—ฌ๊ธฐ๊ฐ€ ์—†๋‹ค. ์•Œ์•„์„œ ๋˜๋Š” ์˜์—ญ์ธ๋“ฏ?

Program Counter Update
#

  • ์ด๊ฒƒ๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ฒ˜๋ฆฌ๋˜์–ด์žˆ๋‹ค.

Test
#

  • ์œ„์˜๊ฒƒ๋“ค์„ ์ถ”๊ฐ€ํ•˜๋ฉด ๊ฐ„๋‹จํ•˜๊ฒŒ ์™„๋ฃŒ๋œ๋‹ค! ์ƒ๊ฐ๋ณด๋‹ค ์‰ฝ๋„ค
Y86-64 Processor: seq-full.hcl
137 bytes of code read
IF: Fetched irmovq at 0x0.  ra=----, rb=%rsp, valC = 0x100
IF: Fetched call at 0xa.  ra=----, rb=----, valC = 0x38
Wrote 0x13 to address 0xf8
IF: Fetched irmovq at 0x38.  ra=----, rb=%rdi, valC = 0x18
IF: Fetched irmovq at 0x42.  ra=----, rb=%rsi, valC = 0x4
IF: Fetched call at 0x4c.  ra=----, rb=----, valC = 0x56
Wrote 0x55 to address 0xf0
IF: Fetched xorq at 0x56.  ra=%rax, rb=%rax, valC = 0x0
IF: Fetched andq at 0x58.  ra=%rsi, rb=%rsi, valC = 0x0
IF: Fetched jmp at 0x5a.  ra=----, rb=----, valC = 0x83
IF: Fetched jne at 0x83.  ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63.  ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d.  ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f.  ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79.  ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83.  ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63.  ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d.  ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f.  ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79.  ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83.  ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63.  ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d.  ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f.  ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79.  ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83.  ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63.  ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d.  ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f.  ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79.  ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83.  ra=----, rb=----, valC = 0x63
IF: Fetched ret at 0x8c.  ra=----, rb=----, valC = 0x0
IF: Fetched ret at 0x55.  ra=----, rb=----, valC = 0x0
IF: Fetched halt at 0x13.  ra=----, rb=----, valC = 0x0
32 instructions executed
Status = HLT
Condition Codes: Z=1 S=0 O=0
Changed Register State:
%rax:   0x0000000000000000      0x0000abcdabcdabcd
%rsp:   0x0000000000000000      0x0000000000000100
%rdi:   0x0000000000000000      0x0000000000000038
%r10:   0x0000000000000000      0x0000a000a000a000
Changed Memory State:
0x00f0: 0x0000000000000000      0x0000000000000055
0x00f8: 0x0000000000000000      0x0000000000000013
ISA Check Succeeds

./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 756 ISA Checks Succeed

Part C
#

  • ncopy.ys๊ฐ€ ์ตœ๋Œ€ํ•œ ๋น ๋ฅด๊ฒŒ ์‹คํ–‰๋Œ€๋„๋ก ์ตœ์ ํ™”ํ•˜๋Š” ๋‹จ๊ณ„์ด๋‹ค.
  • CPE๊ฐ€ 7.5 ์ดํ•˜๋กœ ๊ฐ€๋„๋ก!
  • pipe-full.hcl๋ฅผ ์ˆ˜์ •ํ•˜๊ณ , ncopy.ys๋ฅผ ์ˆ˜์ •ํ•˜์ž.
make VERSION=full GUIMODE="" # pipe-full.hcl ์ˆ˜์ • ํ›„
make drivers         # ncopy.ys ์ˆ˜์ • ํ›„
./correctness.pl     # ์ •ํ™•์„ฑ ํ™•์ธ
./benchmark.pl       # CPE ํ™•์ธ
  • ์ฒ˜์Œ CPE๋Š” 15.18์ด๋‹ค. ํž˜๋‚ด๋ณด์ž๊ณ 

pipe-full.hcl
#

  • ์ผ๋‹จ CPU๋ถ€ํ„ฐ ์†๋ณด์ž.
    • ์—ฌ๊ธฐ์„œ๋„ IIADDQ๋ฅผ ์ถ”๊ฐ€ํ•ด์•ผํ•˜๋Š”๊ฒƒ ๊ฐ™๋‹ค.
Fetch
#
  • ์ผ๋‹จ f_PC๋Š” ์ƒ๊ด€ ์—†์„ ๊ฒƒ ๊ฐ™๋‹ค.
  • f_icode, f_ifun๋„ ๋ฌธ์ œ ์—†๊ณ ..
  • instr_valid๋Š” ์ถ”๊ฐ€ํ•ด์•ผ๊ฒ ์ง€.
  • ์œ„์™€ ๊ฐ™์ด need_regids, need_valC๋„ ๋‹ค ์ถ”๊ฐ€ํ•ด์•ผ๊ฒ ๋‹ค.
  • f_predPC๋Š” ์ ํ”„ํ• ์ผ ์—†์œผ๋‹ˆ๊นŒ f_valP ๊ทธ๋Œ€๋กœ!
Decode
#
  • ๋˜‘๊ฐ™์ด d_srcB, d_dstE ๋‘๊ฐœ๋ฅผ ๋‚จ๊ฒจ์•ผํ•  ๊ฒƒ ๊ฐ™๋‹ค.
  • valA๋ฅผ ๊ฐ€์ ธ์˜ฌ๋•Œ ์ด์Šˆ์ธ๋ฐ… ํฌ์›Œ๋”ฉ์„ ํ•ด์•ผ๋˜๋Š”๋ฐ.
  • valA์—๋Š” Execute ๋‹จ๊ณ„์—์„œ valC๋ฅผ ๊ฐ€์ ธ์˜ค๋ฉด ๋˜๊ณ , valB๋Š” ๋•ก๊ฒจ์˜ฌํ…๋ฐ… ์•„ ์ด๋ฏธ ์ฝ”๋”ฉ์ด ๋˜์–ด์žˆ๋‹ค.
    • e_dstE, M_dstM, M_dstE, W_dstM, W_dstE ์ˆœ์„œ์—์„œ ๋•ก๊ฒจ์˜จ๋‹ค. ์•ˆ๊ฑด๋“œ๋ ค๋„ ๋ ๋“ฏ
Execute
#
  • aluA๋Š” E_valC์—์„œ ๋ฐ๋ ค์˜ค๋Š”๊ฑฐ๋‹ˆ, ๊ฑฐ๊ธฐ๋‹ค๊ฐ€ IIADDQ๋ฅผ ๋„ฃ์–ด์ฃผ์ž.
  • aluB์—๋Š” E_valB์—์„œ ๊ฐ€์ ธ์˜ค๊ฒŒ.
  • alufun์€ ์ƒ๊ด€์—†๊ณ , setcc๊ฐ€ ์ข€ ์ด์Šˆ์ธ๋ฐ…
## Should the condition codes be updated?
bool set_cc = E_icode == IOPQ &&
	# State changes only during normal operation
	!m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };
  • ์›๋ž˜ ์ฝ”๋“œ๊ฐ€ ์ด๋ ‡๊ฒŒ ๋˜์–ด์žˆ๋Š”๋ฐ, E_icode == IOPQ ๋ถ€๋ถ„์„ in ๋ฌธ๋ฒ•์œผ๋กœ ๋ฐ”๊พธ๋ฉด ๋ ๊นŒ?
bool set_cc = E_icode in { IOPQ, IIADDQ} &&
  • ์ด๋ ‡๊ฒŒ ํ•œ๋ฒˆ ๋ฐ”๊ฟ”๋ณด์ž.
Memory
#
  • ํ• ์ผ ์—†์ง€ ์•Š์„๊นŒ?
Pipeline Register conrtrol
#
  • ๋ฒ„๋ธ”์„ ๋„ฃ์„๊นŒ ๋ง๊นŒ์ธ๋ฐ, IADDQ๋Š” ๋ญ ์ƒ๊ด€ ์—†์—ˆ์ง€.
  • stall๋„ ํ•  ํ•„์š” ์—†๊ณ .
test
#
โฏ cd ../y86-code; make testpsim
Makefile:42: warning: ignoring prerequisites on suffix rule definition
Makefile:45: warning: ignoring prerequisites on suffix rule definition
Makefile:48: warning: ignoring prerequisites on suffix rule definition
Makefile:51: warning: ignoring prerequisites on suffix rule definition
../pipe/psim -t asum.yo > asum.pipe
../pipe/psim -t asumr.yo > asumr.pipe
../pipe/psim -t cjr.yo > cjr.pipe
../pipe/psim -t j-cc.yo > j-cc.pipe
../pipe/psim -t poptest.yo > poptest.pipe
../pipe/psim -t pushquestion.yo > pushquestion.pipe
../pipe/psim -t pushtest.yo > pushtest.pipe
../pipe/psim -t prog1.yo > prog1.pipe
../pipe/psim -t prog2.yo > prog2.pipe
../pipe/psim -t prog3.yo > prog3.pipe
../pipe/psim -t prog4.yo > prog4.pipe
../pipe/psim -t prog5.yo > prog5.pipe
../pipe/psim -t prog6.yo > prog6.pipe
../pipe/psim -t prog7.yo > prog7.pipe
../pipe/psim -t prog8.yo > prog8.pipe
../pipe/psim -t ret-hazard.yo > ret-hazard.pipe
grep "ISA Check" *.pipe
asum.pipe:ISA Check Succeeds
asumr.pipe:ISA Check Succeeds
cjr.pipe:ISA Check Succeeds
j-cc.pipe:ISA Check Succeeds
poptest.pipe:ISA Check Succeeds
prog1.pipe:ISA Check Succeeds
prog2.pipe:ISA Check Succeeds
prog3.pipe:ISA Check Succeeds
prog4.pipe:ISA Check Succeeds
prog5.pipe:ISA Check Succeeds
prog6.pipe:ISA Check Succeeds
prog7.pipe:ISA Check Succeeds
prog8.pipe:ISA Check Succeeds
pushquestion.pipe:ISA Check Succeeds
pushtest.pipe:ISA Check Succeeds
ret-hazard.pipe:ISA Check Succeeds
rm asum.pipe asumr.pipe cjr.pipe j-cc.pipe poptest.pipe pushquestion.pipe pushtest.pipe prog1.pipe prog2.pipe prog3.pipe prog4.pipe prog5.pipe prog6.pipe prog7.pipe prog8.pipe ret-hazard.pipe
โฏ cd ../ptest; make SIM=../pipe/psim TFLAGS=-i
./optest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 58 ISA Checks Succeed
./jtest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 96 ISA Checks Succeed
./ctest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 22 ISA Checks Succeed
./htest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 756 ISA Checks Succeed
  • ๊ตฌ์›ƒ ์ž˜ ๋Œ์•„๊ฐ„๋‹ค

ncopy.ys
#

  • ์ผ๋‹จ ์•„๋ฌด๋ž˜๋„ CPE๋Š” ๊ทธ๋Œ€๋กœ 15.18์ด๋‹ค. ์ด๊ฑธ ์ž˜ ๋ฐ”๊ฟ”๋ณด์ž.
# You can modify this portion
	# Loop header
	xorq %rax,%rax		# count = 0;
	andq %rdx,%rdx		# len <= 0?
	jle Done		# if so, goto Done:

Loop:	mrmovq (%rdi), %r10	# read val from src...
	rmmovq %r10, (%rsi)	# ...and store it to dst
	andq %r10, %r10		# val <= 0?
	jle Npos		# if so, goto Npos:
	irmovq $1, %r10
	addq %r10, %rax		# count++
Npos:	irmovq $1, %r10
	subq %r10, %rdx		# len--
	irmovq $8, %r10
	addq %r10, %rdi		# src++
	addq %r10, %rsi		# dst++
	andq %rdx,%rdx		# len > 0?
	jg Loop			# if so, goto Loop:
  • ์šฐ๋ฆฌ๊ฐ€ ๋ฐ”๊ฟ€ ์ˆ˜ ์žˆ๋Š”๊ฑด ์ด๋ถ€๋ถ„์ธ๋ฐ..
IIADDQ
#
  • irmovq ๋ฅผ ์ด์šฉํ•ด์„œ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ์— ๊ฐ’์„ ๋„ฃ๊ณ , ๊ทธ๊ฑธ addq๋กœ ์—ฐ์‚ฐํ•˜๋Š” ๋ถ€๋ถ„์ด ๋‘๊ตฐ๋ฐ ๋ณด์ธ๋‹ค.
    • ์ด๊ฑธ iaddq๋กœ ์ค„์ผ ์ˆ˜ ์žˆ๊ฒ ๋‹ค.
  • ์ด๊ฑธ ์ˆ˜ํ–‰ํ–ˆ๋”๋‹ˆ CPE๊ฐ€ 15.18 -> 13.70์œผ๋กœ ์ค„์—ˆ๋‹ค. ๊ฐˆ๊ธธ์ด ๋ฉ€์–ด๋ณด์ธ๋‹ค..
  • ์–ด๋ผ, Npos์—๋„ $-1๋กœ len–๋ฅผ ๊ตฌํ˜„ํ•  ์ˆ˜ ์žˆ์—ˆ๋‹ค.
    • ๋”ฑ ํ•˜๋‚˜ ์ค„์–ด์„œ 12.70์ด ๋๋‹ค.
์ˆœ์„œ ๋ฐ”๊พธ๊ธฐ?
#
  • mmmovq๊ฐ€ ์—†์œผ๋‹ˆ, mrmovq, rmmovq๋กœ ๋ฐ์ดํ„ฐ๋ฅผ ๋ณต์‚ฌํ•˜๊ณ  ์žˆ๋‹ค.
  • ๊ทธ๋Ÿฐ๋ฐ ์ด๋•Œ ๋ฉ”๋ชจ๋ฆฌ ์ฐธ์กฐ์—์„œ ํ•˜์ž๋“œ๊ฐ€ ์ผ์–ด๋‚˜์„œ, bubble / stall์ด ๋“ค์–ด๊ฐ€์ง€ ์•Š์•˜๋˜๊ฐ€?
  • ์ด ์‚ฌ์ด์— andq๋ฅผ ๋„ฃ์–ด๋ณผ๊นŒ?
  • ์•„ ์ด๊ฒƒ๋งŒ์œผ๋กœ๋Š” ๊ฐ™๋‹ค… ์•ˆ๋˜๋„น
Loop:	mrmovq (%rdi), %r10	# read val from src...
	andq %r10, %r10		# val <= 0?
	rmmovq %r10, (%rsi)	# ...and store it to dst
	jle Npos		# if so, goto Npos:
	iaddq $1, %rax		# count++
  • ์•„ํ•˜, ์•ˆ๋˜๋Š”๊ฒŒ ์•„๋‹ˆ๋ผ ์ด๋ ‡๊ฒŒ ์ˆ˜์ •ํ•œ๊ฑฐ์˜€๋Š”๋ฐ, andq์—์„œ๋„ ๊ฒฐ๊ตญ %r10์ด ํ•„์š”ํ•ด์„œ ์ด์Šˆ๊ฐ€ ์ƒ๊ธด๋‹ค.
  • ๊ทธ๋Ÿฐ๋ฐ ์‚ฌ์ด์— ๋” ๋ผ์šธ์ˆ˜์žˆ๋Š”๊ฒŒ ์—†๋Š”๋ฐ..
์–ด์…ˆ๋ธ”๋ฆฌ ์ตœ์ ํ™”
#
  • ์ฝ”๋“œ ์ž์ฒด๋ฅผ ์ตœ์ ํ™”ํ•ด๋ณผ๊นŒ?
  • ์ ํ”„๊ฐ™์€๊ฑธ ์ž˜ ์—†์•จ ์ˆ˜ ์žˆ์„๊ฑฐ๊ฐ™์€๋ฐ. ์ ํ”„ ์ ์ค‘๋ฅ ์„ ์˜ฌ๋ ค๋ณด์ž.
  • Y86-64๋Š” ์–ธ์ œ๋‚˜ ์ ํ”„๋ฅผ ์•ˆํ•œ๋‹ค๊ณ  ์ƒ๊ฐํ•˜๊ณ  ์›€์ง์ธ๋‹ค.
ncopy:
	xorq %rax,%rax		# count = 0;
	andq %rdx,%rdx		# len <= 0?
	jle Done		# if so, goto Done:

Loop:
	mrmovq (%rdi), %r10	# read val from src...
	andq %r10, %r10		# val <= 0?
	rmmovq %r10, (%rsi)	# ...and store it to dst
	# jle Npos		# if so, goto Npos:
	# iaddq $1, %rax		# count++
	jg plus1
Npos:
	iaddq $-1, %rdx		# len--
	iaddq $8, %rdi		# src++
	iaddq $8, %rsi		# dst++
	andq %rdx,%rdx		# len > 0?
	jle Done
	jmp Loop			# if so, goto Loop:
plus1:
	iaddq $1, %rax
	jmp Npos
  • ์ฉ ์ด๋Ÿฐ๋А๋‚Œ์œผ๋กœ ํ•ด๋ณผ๊นŒ ํ–ˆ๋Š”๋ฐ, ์˜คํžˆ๋ ค ๋ช…๋ น์–ด๊ฐ€ ๋Š˜์–ด์„œ 15๋ฅผ ๋„˜๊ฒจ๋ฒ„๋ ธ๋‹ค. ์–ด๋–ป๊ฒŒ ํ•˜๋ฉด ์ข‹์ง€? ์ ํ”„๋ฅผ ๋œํƒ€์•ผํ• ๊ฑฐ๊ฐ™์€๋ฐ
๋ฒ„ํ‚ท์งˆ
#
  • ์ œ๊ณฑ๊ทผ๋ถ„ํ• ๋ฒ•์—์„œ ํ•˜๋Š”๊ฒƒ์ฒ˜๋Ÿผ, ๋ฒ„ํ‚ท์งˆ๋กœ ๋ฌถ์–ด์„œ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ์ง€ ์•Š์„๊นŒ?
  • ๊ทธ๋ ‡๊ฒŒํ•˜๋ฉด mrmovq ํ•ด์ €๋“œ๋„ ์—†์•จ ์ˆ˜ ์žˆ์ง€ ์•Š์„๊นŒ?
Loop:
	mrmovq (%rdi), %r10	# read val from src...
	mrmovq 8(%rdi), %r11 # src+1
	rmmovq %r10, (%rsi)	# ...and store it to dst
	rmmovq %r11, 8(%rsi) # src+1 to dst+1
	andq %r10, %r10		# val <= 0?
	jle Npos		# if so, goto Npos:
	iaddq $1, %rax		# count++
Npos:
	andq %r11, %r11
	jle Npos2
	iaddq $1, %rax
Npos2:
	iaddq $-2, %rdx		# len--
	iaddq $16, %rdi		# src++
	iaddq $16, %rsi		# dst++
	andq %rdx,%rdx		# len > 0?
	jg Loop			# if so, goto Loop:
  • ์•„ ๋‹ค ์ข‹์€๋ฐ.. ์ด๊ฒŒ ์ง์ˆ˜๊ฐœ์ผ๋•Œ๋งŒ ๋™์ž‘ํ•œ๋‹ค. ํ™€์ˆ˜๊ฐœ์ด๋ฉด ์–ด์นด์ง€? Loop2๋ฅผ ๋งŒ๋“ค๊นŒ?
  • Loop๋กœ๋Š” len > 1์ผ๋•Œ๋งŒ ๋ณด๋‚ด์ž.
ncopy:
	xorq %rax,%rax		# count = 0;
	iaddq $-1, %rdx
	jl Done
Loop:
	je move1
	mrmovq (%rdi), %r10	# read val from src...
	mrmovq 8(%rdi), %r11 # src+1
	rmmovq %r10, (%rsi)	# ...and store it to dst
	rmmovq %r11, 8(%rsi) # src+1 to dst+1
	andq %r10, %r10		# val <= 0?
	jle Npos		# if so, goto Npos:
	iaddq $1, %rax		# count++
Npos:
	andq %r11, %r11
	jle Npos2
	iaddq $1, %rax
Npos2:
	iaddq $16, %rdi		# src += 2
	iaddq $16, %rsi		# dst += 2
	iaddq $-2, %rdx		# len -= 2
	jge Loop
	jmp Done
move1:
	mrmovq (%rdi), %r10	# read val from src...
	rmmovq %r10, (%rsi)	# ...and store it to dst
	andq %r10, %r10		# val <= 0?
	jle Done		# if so, goto Npos:
	iaddq $1, %rax		# count++
  • ์˜ค!! ์ด๋ ‡๊ฒŒํ•˜๋‹ˆ๊นŒ ๋œ๋‹ค!!
  • CPE 10.08๊นŒ์ง€ ์ค„์˜€๋‹ค.
  • ๋ฐฐ์น˜๊ฐ€ 0~64๊ฐœ๊นŒ์ง€ ์žˆ์œผ๋‹ˆ๊นŒ, ๋Œ€์ถฉ 8๊ฐœ์ •๋„๊ฐ€ ์ œ๊ณฑ๊ทผ ๋ถ„ํ• ๋ฒ•์ƒ ๋งž๋Š”๊ฑฐ๊ฐ™์ง€๋งŒ..
  • %r10๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋‹ˆ, %r10~r15๊นŒ์ง€ 6๊ฐœ๋ฅผ ์จ์„œ ํ•ด๋ณผ๊นŒ? ํ•˜์•„ ์ฝ”๋“œ๊ฐ€ ๊ฐœ๊ธธ์–ด์ง„๋‹ค
ncopy:
	xorq %rax,%rax		# count = 0;
isbig:
	iaddq $-6, %rdx
	jge Loop
	iaddq $6, %rdx
router: # check remainder 0 to 5
	iaddq $-1, %rdx
	jl Done
	je move1
	iaddq $-2, %rdx
	jl move2
	je move3
	iaddq $-2, %rdx
	jl move4
	je move5
Loop:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	mrmovq 24(%rdi), %r13
	mrmovq 32(%rdi), %r14
	mrmovq 40(%rdi), %r9
	rmmovq %r10, (%rsi)
	rmmovq %r11, 8(%rsi)
	rmmovq %r12, 16(%rsi)
	rmmovq %r13, 24(%rsi)
	rmmovq %r14, 32(%rsi)
	rmmovq %r9, 40(%rsi)
Npos_loop1:
	andq %r10, %r10
	jle Npos_loop2
	iaddq $1, %rax
Npos_loop2:
	andq %r11, %r11
	jle Npos_loop3
	iaddq $1, %rax
Npos_loop3:
	andq %r12, %r12
	jle Npos_loop4
	iaddq $1, %rax
Npos_loop4:
	andq %r13, %r13
	jle Npos_loop5
	iaddq $1, %rax
Npos_loop5:
	andq %r14, %r14
	jle Npos_loop6
	iaddq $1, %rax
Npos_loop6:
	andq %r9, %r9
	jle Loop_end
	iaddq $1, %rax
Loop_end:
	iaddq $48, %rdi
	iaddq $48, %rsi
	iaddq $-6, %rdx
	jge Loop
	iaddq $6, %rdx
	jmp router
move1:
	mrmovq (%rdi), %r10
	rmmovq %r10, (%rsi)
	jmp move_check10
move2:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	rmmovq %r10, (%rsi)
	rmmovq %r11, 8(%rsi)
	jmp move_check11
move3:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	rmmovq %r10, (%rsi)
	rmmovq %r11, 8(%rsi)
	rmmovq %r12, 16(%rsi)
	jmp move_check12
move4:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	mrmovq 24(%rdi), %r13
	rmmovq %r10, (%rsi)
	rmmovq %r11, 8(%rsi)
	rmmovq %r12, 16(%rsi)
	rmmovq %r13, 24(%rsi)
	jmp move_check13
move5:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	mrmovq 24(%rdi), %r13
	mrmovq 32(%rdi), %r14
	rmmovq %r10, (%rsi)
	rmmovq %r11, 8(%rsi)
	rmmovq %r12, 16(%rsi)
	rmmovq %r13, 24(%rsi)
	rmmovq %r14, 32(%rsi)
move_check14:
	andq %r14, %r14
	jle move_check13
	iaddq $1, %rax
move_check13:
	andq %r13, %r13
	jle move_check12
	iaddq $1, %rax
move_check12:
	andq %r12, %r12
	jle move_check11
	iaddq $1, %rax
move_check11:
	andq %r11, %r11
	jle move_check10
	iaddq $1, %rax
move_check10:
	andq %r10, %r10
	jle Done
	iaddq $1, %rax
  • ์—ด์‹ฌํžˆ ํ–ˆ๋”๋‹ˆ 8.01๊นŒ์ง€ ์ค„์—ˆ๋‹ค!!
์ด๋ถ„ ํƒ์ƒ‰
#
router_tree: # now 0 <= rdx <= 5
	iaddq $-3, %rdx
	je move3
	jg router_tree_R
router_tree_L: # rdx < 3
	iaddq $2, %rdx
	jl Done
	je move1
	jg move2
router_tree_R: # rdx > 3
	iaddq $-1, %rdx
	je move4
	jg move5
  • ๋ผ์šฐํ„ฐ๋ฅผ ์ด๋ถ„ํƒ์ƒ‰์œผ๋กœ ํ•ด๋ดค๊ณ , 7.80๊นŒ์ง€ ์ค„์ผ ์ˆ˜ ์žˆ์—ˆ๋‹ค.
  • move ๋กœ์ง๋„ ์ข€ ์ˆ˜์ •ํ–ˆ๋‹ค.
##################################################################
# You can modify this portion
	# Loop header
	xorq %rax,%rax		# count = 0;
isbig:
	iaddq $-6, %rdx
	jge Loop
router:
	iaddq $3, %rdx
	jl router_L
	je move3
	jg router_R
router_L:
	iaddq $2, %rdx
	jl Done
	je move1
	jmp move2
router_R:
	iaddq $-1, %rdx
	je move4
	jmp move5
Loop:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	mrmovq 24(%rdi), %r13
	mrmovq 32(%rdi), %r14
	mrmovq 40(%rdi), %r9
Npos_loop1:
	andq %r10, %r10
	rmmovq %r10, (%rsi)
	jle Npos_loop2
	iaddq $1, %rax
Npos_loop2:
	andq %r11, %r11
	rmmovq %r11, 8(%rsi)
	jle Npos_loop3
	iaddq $1, %rax
Npos_loop3:
	andq %r12, %r12
	rmmovq %r12, 16(%rsi)
	jle Npos_loop4
	iaddq $1, %rax
Npos_loop4:
	andq %r13, %r13
	rmmovq %r13, 24(%rsi)
	jle Npos_loop5
	iaddq $1, %rax
Npos_loop5:
	andq %r14, %r14
	rmmovq %r14, 32(%rsi)
	jle Npos_loop6
	iaddq $1, %rax
Npos_loop6:
	andq %r9, %r9
	rmmovq %r9, 40(%rsi)
	jle Loop_end
	iaddq $1, %rax
Loop_end:
	iaddq $48, %rdi
	iaddq $48, %rsi
	iaddq $-6, %rdx
	jge Loop
	jmp router
move1:
	mrmovq (%rdi), %r10
	jmp move_check10
move2:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	jmp move_check11
move3:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	jmp move_check12
move4:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	mrmovq 24(%rdi), %r13
	jmp move_check13
move5:
	mrmovq (%rdi), %r10
	mrmovq 8(%rdi), %r11
	mrmovq 16(%rdi), %r12
	mrmovq 24(%rdi), %r13
	mrmovq 32(%rdi), %r14
move_check14:
	andq %r14, %r14
	rmmovq %r14, 32(%rsi)
	jle move_check13
	iaddq $1, %rax
move_check13:
	andq %r13, %r13
	rmmovq %r13, 24(%rsi)
	jle move_check12
	iaddq $1, %rax
move_check12:
	andq %r12, %r12
	rmmovq %r12, 16(%rsi)
	jle move_check11
	iaddq $1, %rax
move_check11:
	andq %r11, %r11
	rmmovq %r11, 8(%rsi)
	jle move_check10
	iaddq $1, %rax
move_check10:
	andq %r10, %r10
	rmmovq %r10, (%rsi)
	jle Done
	iaddq $1, %rax
  • 7.73์ •๋„๊นŒ์ง€ ์ค„์˜€๋Š”๋ฐ.. ์ง„์งœ ๋”๋Š” ๋ชปํ•˜๊ฒ ๋‹ค. ๋ฉ€ ํ•ด๋„ ์•ˆ์ค„์–ด๋“ ๋‹ค ใ… .ใ…  ์—ฌ๊ธฐ์„œ ํฌ๊ธฐ

โ”์งˆ๋ฌธ ์‚ฌํ•ญ
#

๐Ÿ”— ์ฐธ๊ณ  ์ž๋ฃŒ
#