๐ ์์ธ ์ ๋ฆฌ#
Part A#
sum.ys#
- Y86_64 ๋ฌธ๋ฒ์ผ๋ก ์ฐ๊ฒฐ๋ฆฌ์คํธ์ ์์๋ค์ ๋ํ๋ C ์ฝ๋๋ฅผ ๋ฐ๊พธ์.
.pos 0
irmovq stack, %rsp
call main
halt
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1, %rdi
call sum_list
ret
sum_list:
irmovq $0, %rax
sum_list_while:
andq %rdi, %rdi
je sum_list_while_end
mrmovq (%rdi), %rbx
addq %rbx, %rax
mrmovq 8(%rdi), %rdi
jmp sum_list_while
sum_list_while_end:
ret
.pos 0x200
stack:rsum.ys#
- sum.ys์ ๊ฐ์ง๋ง, ์ฌ๊ทํจ์๋ก ๊ตฌ์ฑํ์.
.pos 0
irmovq stack, %rsp
call main
halt
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1, %rdi
call rsum_list
ret
rsum_list:
andq %rdi, %rdi
jne rsum_list_func
irmovq $0, %rax
ret
rsum_list_func:
pushq %rbx
mrmovq (%rdi), %rbx
mrmovq 8(%rdi), %rdi
call rsum_list
addq %rbx, %rax
popq %rbx
ret
.pos 0x200
stack:copy.ys#
- ์ด๋ฒ์ source ๋ธ๋ญ์์ destination ๋ธ๋ญ์ผ๋ก ๊ฐ์ ๋ณต์ฌํ๋๊ฑธ ์ํํ์
.pos 0
irmovq stack, %rsp
call main
halt
.align 8
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
dest:
.quad 0x111
.quad 0x222
.quad 0x333
main:
irmovq src, %rdi
irmovq dest, %rsi
irmovq $3, %rdx
call copy_block
ret
copy_block:
xorq %rax, %rax // result = 0
copy_block_while:
andq %rdx, %rdx
je copy_block_end
irmovq $8, %rcx
mrmovq (%rdi), %rbx // val = *src
addq %rcx, %rdi // src++
rmmovq %rbx, (%rsi) // *dest = val
addq %rcx, %rsi // dest++
xorq %rbx, %rax // result ^= val
irmovq $1, %rbx
subq %rbx, %rdx
jmp copy_block_while
copy_block_end:
ret
.pos 0x200
stack:Part B#
- SEQ ํ๋ก์ธ์ (seq-full.hcl)์ iaddq ๋ช ๋ น์ด๋ฅผ ์ถ๊ฐํ์.
- Fetch / Decode / Execute / Mmory / Write-back๋ฅผ ์ ์ฒ๋ฆฌํด์ผ๊ฒ ๋ค…
- ์ฒ์ฒํ ํด๋ณด์
Fetch#
################ Fetch Stage ###################################
# Determine instruction code
word icode = [
imem_error: INOP;
1: imem_icode; # Default: get from instruction memory
];
# Determine instruction function
word ifun = [
imem_error: FNONE;
1: imem_ifun; # Default: get from instruction memory
];
bool instr_valid = icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
# Does fetched instruction require a regid byte?
bool need_regids =
icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ };
# Does fetched instruction require a constant word?
bool need_valC =
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL };- iaddq๋ฅผ ๋ญ๊ฐ ํ์ํ ๊น?
- ์ผ๋จ icode, ifun ์ ๋ฌผ๋ก ์ ์๋ค๊ณ ์๊ฐํ๊ณ , ์๋๋ฅผ ์๊ฐํด๋ณด์.
- instr_valid๋ฅผ true๋ก ๋ง๋ค๊ธฐ ์ํด ์ ๊ธฐ์๋ค๊ฐ IIADDQ๋ฅผ ์ถ๊ฐํ๊ณ ,
- ๋ ์ง์คํฐ๊ฐ ํ์ํ๊ฐ? ์๋ฌด๋๋ ๋ ์ง์คํฐ์ ๋ํด์ผํ๋ค.
- val_C๋ ํ์ํ๊ฐ? ์๋ฌด๋๋ ์์๊ฐ์ ๋ํ๋ ค๋ฉด ํ์ํ๋ค. ๋ค ์ถ๊ฐํด์ฃผ์.
Decode#
################ Decode Stage ###################################
## What register should be used as the A source?
word srcA = [
icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : rA;
icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word srcB = [
icode in { IOPQ, IRMMOVQ, IMRMOVQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word dstE = [
icode in { IRRMOVQ } && Cnd : rB;
icode in { IIRMOVQ, IOPQ} : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
icode in { IMRMOVQ, IPOPQ } : rA;
1 : RNONE; # Don't write any register
];- srcA, srcB, dstE, dstM์ค์์ ๋ญ๊ฐ ํ์ํ ๊น?
- ์๋ฌด๋๋ ๊ฐ์ ธ์ฌํ์ ์์ผ๋ srcA, srcB๋ ํ์ ์๋๊ฑฐ๊ฐ๊ณ , dst๋ง ์์ผ๋ฉด ๋ ๊ฒ๊ฐ๋ค.
- ์๋์ง, srcB์๋ค๊ฐ ๋ํด์ผํ๋๊ฑฐ๋๊น srcB๋ ๊ฐ์ ธ์์ผํ๋ค.
- ๋ฉ๋ชจ๋ฆฌ์์ ๊ธ์ด์จ๊ฒ ์๋๋ผ Execute์์ ์ป์ ์ ์๋ ๊ฐ์ด๋, dstE์ ์ถ๊ฐํ๋ฉด ๋๊ฒ ๋ค.
- ๊ทผ๋ฐ Cnd๋ ๋ญ์ง? condition์ค์ ํ๋์ธ๊ฑฐ๊ฐ์๋ฐ, ์ผ๋จ ๋ฌด์กฐ๊ฑด rB๋ ๋์ด์ผํ๋ ๋๋ฒ์งธ์ค์ ๋ฃ์.
Execute#
################ Execute Stage ###################################
## Select input A to ALU
word aluA = [
icode in { IRRMOVQ, IOPQ } : valA;
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : valC;
icode in { ICALL, IPUSHQ } : -8;
icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ } : valB;
icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
icode == IOPQ : ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = icode in { IOPQ };- ๊ณ์ฐ์ด๋ ์กฐ๊ฑด๋นํธ ์ค์ ๊น์ง ๋ค ํด์ผํ๋ Execute ๋จ๊ณ์ด๋ค.
- aluA์ valC๋ฅผ, aluB์ valB๋ฅผ ๋ฃ์ด์ผ๊ฒ ์ง?
- alufun์ ์๋๊ณ , setcc๋ ์ผ์ผ๊ฒ ๋ค.
Memory#
- ์ฌ๊ธฐ์ ๋ญ ํ ๊ฒ ์์ฃ ๋ฉ๋ชจ๋ฆฌ์ ์ ๊ทผ์ ์ํ๋๋
Write back#
- ์? ์๋ณธ ์ฝ๋์๋ ์ฌ๊ธฐ๊ฐ ์๋ค. ์์์ ๋๋ ์์ญ์ธ๋ฏ?
Program Counter Update#
- ์ด๊ฒ๋ ์์ฐ์ค๋ฝ๊ฒ ์ฒ๋ฆฌ๋์ด์๋ค.
Test#
- ์์๊ฒ๋ค์ ์ถ๊ฐํ๋ฉด ๊ฐ๋จํ๊ฒ ์๋ฃ๋๋ค! ์๊ฐ๋ณด๋ค ์ฝ๋ค
Y86-64 Processor: seq-full.hcl
137 bytes of code read
IF: Fetched irmovq at 0x0. ra=----, rb=%rsp, valC = 0x100
IF: Fetched call at 0xa. ra=----, rb=----, valC = 0x38
Wrote 0x13 to address 0xf8
IF: Fetched irmovq at 0x38. ra=----, rb=%rdi, valC = 0x18
IF: Fetched irmovq at 0x42. ra=----, rb=%rsi, valC = 0x4
IF: Fetched call at 0x4c. ra=----, rb=----, valC = 0x56
Wrote 0x55 to address 0xf0
IF: Fetched xorq at 0x56. ra=%rax, rb=%rax, valC = 0x0
IF: Fetched andq at 0x58. ra=%rsi, rb=%rsi, valC = 0x0
IF: Fetched jmp at 0x5a. ra=----, rb=----, valC = 0x83
IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
IF: Fetched mrmovq at 0x63. ra=%r10, rb=%rdi, valC = 0x0
IF: Fetched addq at 0x6d. ra=%r10, rb=%rax, valC = 0x0
IF: Fetched iaddq at 0x6f. ra=----, rb=%rdi, valC = 0x8
IF: Fetched iaddq at 0x79. ra=----, rb=%rsi, valC = 0xffffffffffffffff
IF: Fetched jne at 0x83. ra=----, rb=----, valC = 0x63
IF: Fetched ret at 0x8c. ra=----, rb=----, valC = 0x0
IF: Fetched ret at 0x55. ra=----, rb=----, valC = 0x0
IF: Fetched halt at 0x13. ra=----, rb=----, valC = 0x0
32 instructions executed
Status = HLT
Condition Codes: Z=1 S=0 O=0
Changed Register State:
%rax: 0x0000000000000000 0x0000abcdabcdabcd
%rsp: 0x0000000000000000 0x0000000000000100
%rdi: 0x0000000000000000 0x0000000000000038
%r10: 0x0000000000000000 0x0000a000a000a000
Changed Memory State:
0x00f0: 0x0000000000000000 0x0000000000000055
0x00f8: 0x0000000000000000 0x0000000000000013
ISA Check Succeeds
./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 756 ISA Checks SucceedPart C#
- ncopy.ys๊ฐ ์ต๋ํ ๋น ๋ฅด๊ฒ ์คํ๋๋๋ก ์ต์ ํํ๋ ๋จ๊ณ์ด๋ค.
- CPE๊ฐ 7.5 ์ดํ๋ก ๊ฐ๋๋ก!
- pipe-full.hcl๋ฅผ ์์ ํ๊ณ , ncopy.ys๋ฅผ ์์ ํ์.
make VERSION=full GUIMODE="" # pipe-full.hcl ์์ ํ
make drivers # ncopy.ys ์์ ํ
./correctness.pl # ์ ํ์ฑ ํ์ธ
./benchmark.pl # CPE ํ์ธ- ์ฒ์ CPE๋ 15.18์ด๋ค. ํ๋ด๋ณด์๊ณ
pipe-full.hcl#
- ์ผ๋จ CPU๋ถํฐ ์๋ณด์.
- ์ฌ๊ธฐ์๋ IIADDQ๋ฅผ ์ถ๊ฐํด์ผํ๋๊ฒ ๊ฐ๋ค.
Fetch#
- ์ผ๋จ f_PC๋ ์๊ด ์์ ๊ฒ ๊ฐ๋ค.
- f_icode, f_ifun๋ ๋ฌธ์ ์๊ณ ..
- instr_valid๋ ์ถ๊ฐํด์ผ๊ฒ ์ง.
- ์์ ๊ฐ์ด need_regids, need_valC๋ ๋ค ์ถ๊ฐํด์ผ๊ฒ ๋ค.
- f_predPC๋ ์ ํํ ์ผ ์์ผ๋๊น f_valP ๊ทธ๋๋ก!
Decode#
- ๋๊ฐ์ด d_srcB, d_dstE ๋๊ฐ๋ฅผ ๋จ๊ฒจ์ผํ ๊ฒ ๊ฐ๋ค.
- valA๋ฅผ ๊ฐ์ ธ์ฌ๋ ์ด์์ธ๋ฐ… ํฌ์๋ฉ์ ํด์ผ๋๋๋ฐ.
- valA์๋ Execute ๋จ๊ณ์์ valC๋ฅผ ๊ฐ์ ธ์ค๋ฉด ๋๊ณ , valB๋ ๋ก๊ฒจ์ฌํ
๋ฐ… ์ ์ด๋ฏธ ์ฝ๋ฉ์ด ๋์ด์๋ค.
- e_dstE, M_dstM, M_dstE, W_dstM, W_dstE ์์์์ ๋ก๊ฒจ์จ๋ค. ์๊ฑด๋๋ ค๋ ๋ ๋ฏ
Execute#
- aluA๋ E_valC์์ ๋ฐ๋ ค์ค๋๊ฑฐ๋, ๊ฑฐ๊ธฐ๋ค๊ฐ IIADDQ๋ฅผ ๋ฃ์ด์ฃผ์.
- aluB์๋ E_valB์์ ๊ฐ์ ธ์ค๊ฒ.
- alufun์ ์๊ด์๊ณ , setcc๊ฐ ์ข ์ด์์ธ๋ฐ…
## Should the condition codes be updated?
bool set_cc = E_icode == IOPQ &&
# State changes only during normal operation
!m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };- ์๋ ์ฝ๋๊ฐ ์ด๋ ๊ฒ ๋์ด์๋๋ฐ, E_icode == IOPQ ๋ถ๋ถ์ in ๋ฌธ๋ฒ์ผ๋ก ๋ฐ๊พธ๋ฉด ๋ ๊น?
bool set_cc = E_icode in { IOPQ, IIADDQ} &&- ์ด๋ ๊ฒ ํ๋ฒ ๋ฐ๊ฟ๋ณด์.
Memory#
- ํ ์ผ ์์ง ์์๊น?
Pipeline Register conrtrol#
- ๋ฒ๋ธ์ ๋ฃ์๊น ๋ง๊น์ธ๋ฐ, IADDQ๋ ๋ญ ์๊ด ์์์ง.
- stall๋ ํ ํ์ ์๊ณ .
test#
โฏ cd ../y86-code; make testpsim
Makefile:42: warning: ignoring prerequisites on suffix rule definition
Makefile:45: warning: ignoring prerequisites on suffix rule definition
Makefile:48: warning: ignoring prerequisites on suffix rule definition
Makefile:51: warning: ignoring prerequisites on suffix rule definition
../pipe/psim -t asum.yo > asum.pipe
../pipe/psim -t asumr.yo > asumr.pipe
../pipe/psim -t cjr.yo > cjr.pipe
../pipe/psim -t j-cc.yo > j-cc.pipe
../pipe/psim -t poptest.yo > poptest.pipe
../pipe/psim -t pushquestion.yo > pushquestion.pipe
../pipe/psim -t pushtest.yo > pushtest.pipe
../pipe/psim -t prog1.yo > prog1.pipe
../pipe/psim -t prog2.yo > prog2.pipe
../pipe/psim -t prog3.yo > prog3.pipe
../pipe/psim -t prog4.yo > prog4.pipe
../pipe/psim -t prog5.yo > prog5.pipe
../pipe/psim -t prog6.yo > prog6.pipe
../pipe/psim -t prog7.yo > prog7.pipe
../pipe/psim -t prog8.yo > prog8.pipe
../pipe/psim -t ret-hazard.yo > ret-hazard.pipe
grep "ISA Check" *.pipe
asum.pipe:ISA Check Succeeds
asumr.pipe:ISA Check Succeeds
cjr.pipe:ISA Check Succeeds
j-cc.pipe:ISA Check Succeeds
poptest.pipe:ISA Check Succeeds
prog1.pipe:ISA Check Succeeds
prog2.pipe:ISA Check Succeeds
prog3.pipe:ISA Check Succeeds
prog4.pipe:ISA Check Succeeds
prog5.pipe:ISA Check Succeeds
prog6.pipe:ISA Check Succeeds
prog7.pipe:ISA Check Succeeds
prog8.pipe:ISA Check Succeeds
pushquestion.pipe:ISA Check Succeeds
pushtest.pipe:ISA Check Succeeds
ret-hazard.pipe:ISA Check Succeeds
rm asum.pipe asumr.pipe cjr.pipe j-cc.pipe poptest.pipe pushquestion.pipe pushtest.pipe prog1.pipe prog2.pipe prog3.pipe prog4.pipe prog5.pipe prog6.pipe prog7.pipe prog8.pipe ret-hazard.pipe
โฏ cd ../ptest; make SIM=../pipe/psim TFLAGS=-i
./optest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
All 58 ISA Checks Succeed
./jtest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
All 96 ISA Checks Succeed
./ctest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
All 22 ISA Checks Succeed
./htest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
All 756 ISA Checks Succeed- ๊ตฌ์ ์ ๋์๊ฐ๋ค
ncopy.ys#
- ์ผ๋จ ์๋ฌด๋๋ CPE๋ ๊ทธ๋๋ก 15.18์ด๋ค. ์ด๊ฑธ ์ ๋ฐ๊ฟ๋ณด์.
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
andq %rdx,%rdx # len <= 0?
jle Done # if so, goto Done:
Loop: mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos # if so, goto Npos:
irmovq $1, %r10
addq %r10, %rax # count++
Npos: irmovq $1, %r10
subq %r10, %rdx # len--
irmovq $8, %r10
addq %r10, %rdi # src++
addq %r10, %rsi # dst++
andq %rdx,%rdx # len > 0?
jg Loop # if so, goto Loop:- ์ฐ๋ฆฌ๊ฐ ๋ฐ๊ฟ ์ ์๋๊ฑด ์ด๋ถ๋ถ์ธ๋ฐ..
IIADDQ#
- irmovq ๋ฅผ ์ด์ฉํด์ ๋ ์ง์คํธ๋ฆฌ์ ๊ฐ์ ๋ฃ๊ณ , ๊ทธ๊ฑธ addq๋ก ์ฐ์ฐํ๋ ๋ถ๋ถ์ด ๋๊ตฐ๋ฐ ๋ณด์ธ๋ค.
- ์ด๊ฑธ iaddq๋ก ์ค์ผ ์ ์๊ฒ ๋ค.
- ์ด๊ฑธ ์ํํ๋๋ CPE๊ฐ 15.18 -> 13.70์ผ๋ก ์ค์๋ค. ๊ฐ๊ธธ์ด ๋ฉ์ด๋ณด์ธ๋ค..
- ์ด๋ผ, Npos์๋ $-1๋ก len–๋ฅผ ๊ตฌํํ ์ ์์๋ค.
- ๋ฑ ํ๋ ์ค์ด์ 12.70์ด ๋๋ค.
์์ ๋ฐ๊พธ๊ธฐ?#
- mmmovq๊ฐ ์์ผ๋, mrmovq, rmmovq๋ก ๋ฐ์ดํฐ๋ฅผ ๋ณต์ฌํ๊ณ ์๋ค.
- ๊ทธ๋ฐ๋ฐ ์ด๋ ๋ฉ๋ชจ๋ฆฌ ์ฐธ์กฐ์์ ํ์๋๊ฐ ์ผ์ด๋์, bubble / stall์ด ๋ค์ด๊ฐ์ง ์์๋๊ฐ?
- ์ด ์ฌ์ด์ andq๋ฅผ ๋ฃ์ด๋ณผ๊น?
- ์ ์ด๊ฒ๋ง์ผ๋ก๋ ๊ฐ๋ค… ์๋๋น
Loop: mrmovq (%rdi), %r10 # read val from src...
andq %r10, %r10 # val <= 0?
rmmovq %r10, (%rsi) # ...and store it to dst
jle Npos # if so, goto Npos:
iaddq $1, %rax # count++- ์ํ, ์๋๋๊ฒ ์๋๋ผ ์ด๋ ๊ฒ ์์ ํ๊ฑฐ์๋๋ฐ, andq์์๋ ๊ฒฐ๊ตญ %r10์ด ํ์ํด์ ์ด์๊ฐ ์๊ธด๋ค.
- ๊ทธ๋ฐ๋ฐ ์ฌ์ด์ ๋ ๋ผ์ธ์์๋๊ฒ ์๋๋ฐ..
์ด์ ๋ธ๋ฆฌ ์ต์ ํ#
- ์ฝ๋ ์์ฒด๋ฅผ ์ต์ ํํด๋ณผ๊น?
- ์ ํ๊ฐ์๊ฑธ ์ ์์จ ์ ์์๊ฑฐ๊ฐ์๋ฐ. ์ ํ ์ ์ค๋ฅ ์ ์ฌ๋ ค๋ณด์.
- Y86-64๋ ์ธ์ ๋ ์ ํ๋ฅผ ์ํ๋ค๊ณ ์๊ฐํ๊ณ ์์ง์ธ๋ค.
ncopy:
xorq %rax,%rax # count = 0;
andq %rdx,%rdx # len <= 0?
jle Done # if so, goto Done:
Loop:
mrmovq (%rdi), %r10 # read val from src...
andq %r10, %r10 # val <= 0?
rmmovq %r10, (%rsi) # ...and store it to dst
# jle Npos # if so, goto Npos:
# iaddq $1, %rax # count++
jg plus1
Npos:
iaddq $-1, %rdx # len--
iaddq $8, %rdi # src++
iaddq $8, %rsi # dst++
andq %rdx,%rdx # len > 0?
jle Done
jmp Loop # if so, goto Loop:
plus1:
iaddq $1, %rax
jmp Npos- ์ฉ ์ด๋ฐ๋๋์ผ๋ก ํด๋ณผ๊น ํ๋๋ฐ, ์คํ๋ ค ๋ช ๋ น์ด๊ฐ ๋์ด์ 15๋ฅผ ๋๊ฒจ๋ฒ๋ ธ๋ค. ์ด๋ป๊ฒ ํ๋ฉด ์ข์ง? ์ ํ๋ฅผ ๋ํ์ผํ ๊ฑฐ๊ฐ์๋ฐ
๋ฒํท์ง#
- ์ ๊ณฑ๊ทผ๋ถํ ๋ฒ์์ ํ๋๊ฒ์ฒ๋ผ, ๋ฒํท์ง๋ก ๋ฌถ์ด์ ์ฒ๋ฆฌํ ์ ์์ง ์์๊น?
- ๊ทธ๋ ๊ฒํ๋ฉด mrmovq ํด์ ๋๋ ์์จ ์ ์์ง ์์๊น?
Loop:
mrmovq (%rdi), %r10 # read val from src...
mrmovq 8(%rdi), %r11 # src+1
rmmovq %r10, (%rsi) # ...and store it to dst
rmmovq %r11, 8(%rsi) # src+1 to dst+1
andq %r10, %r10 # val <= 0?
jle Npos # if so, goto Npos:
iaddq $1, %rax # count++
Npos:
andq %r11, %r11
jle Npos2
iaddq $1, %rax
Npos2:
iaddq $-2, %rdx # len--
iaddq $16, %rdi # src++
iaddq $16, %rsi # dst++
andq %rdx,%rdx # len > 0?
jg Loop # if so, goto Loop:- ์ ๋ค ์ข์๋ฐ.. ์ด๊ฒ ์ง์๊ฐ์ผ๋๋ง ๋์ํ๋ค. ํ์๊ฐ์ด๋ฉด ์ด์นด์ง? Loop2๋ฅผ ๋ง๋ค๊น?
- Loop๋ก๋ len > 1์ผ๋๋ง ๋ณด๋ด์.
ncopy:
xorq %rax,%rax # count = 0;
iaddq $-1, %rdx
jl Done
Loop:
je move1
mrmovq (%rdi), %r10 # read val from src...
mrmovq 8(%rdi), %r11 # src+1
rmmovq %r10, (%rsi) # ...and store it to dst
rmmovq %r11, 8(%rsi) # src+1 to dst+1
andq %r10, %r10 # val <= 0?
jle Npos # if so, goto Npos:
iaddq $1, %rax # count++
Npos:
andq %r11, %r11
jle Npos2
iaddq $1, %rax
Npos2:
iaddq $16, %rdi # src += 2
iaddq $16, %rsi # dst += 2
iaddq $-2, %rdx # len -= 2
jge Loop
jmp Done
move1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Done # if so, goto Npos:
iaddq $1, %rax # count++- ์ค!! ์ด๋ ๊ฒํ๋๊น ๋๋ค!!
- CPE 10.08๊น์ง ์ค์๋ค.
- ๋ฐฐ์น๊ฐ 0~64๊ฐ๊น์ง ์์ผ๋๊น, ๋์ถฉ 8๊ฐ์ ๋๊ฐ ์ ๊ณฑ๊ทผ ๋ถํ ๋ฒ์ ๋ง๋๊ฑฐ๊ฐ์ง๋ง..
- %r10๋ถํฐ ์์ํ๋, %r10~r15๊น์ง 6๊ฐ๋ฅผ ์จ์ ํด๋ณผ๊น? ํ์ ์ฝ๋๊ฐ ๊ฐ๊ธธ์ด์ง๋ค
ncopy:
xorq %rax,%rax # count = 0;
isbig:
iaddq $-6, %rdx
jge Loop
iaddq $6, %rdx
router: # check remainder 0 to 5
iaddq $-1, %rdx
jl Done
je move1
iaddq $-2, %rdx
jl move2
je move3
iaddq $-2, %rdx
jl move4
je move5
Loop:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
mrmovq 32(%rdi), %r14
mrmovq 40(%rdi), %r9
rmmovq %r10, (%rsi)
rmmovq %r11, 8(%rsi)
rmmovq %r12, 16(%rsi)
rmmovq %r13, 24(%rsi)
rmmovq %r14, 32(%rsi)
rmmovq %r9, 40(%rsi)
Npos_loop1:
andq %r10, %r10
jle Npos_loop2
iaddq $1, %rax
Npos_loop2:
andq %r11, %r11
jle Npos_loop3
iaddq $1, %rax
Npos_loop3:
andq %r12, %r12
jle Npos_loop4
iaddq $1, %rax
Npos_loop4:
andq %r13, %r13
jle Npos_loop5
iaddq $1, %rax
Npos_loop5:
andq %r14, %r14
jle Npos_loop6
iaddq $1, %rax
Npos_loop6:
andq %r9, %r9
jle Loop_end
iaddq $1, %rax
Loop_end:
iaddq $48, %rdi
iaddq $48, %rsi
iaddq $-6, %rdx
jge Loop
iaddq $6, %rdx
jmp router
move1:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
jmp move_check10
move2:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
rmmovq %r10, (%rsi)
rmmovq %r11, 8(%rsi)
jmp move_check11
move3:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
rmmovq %r10, (%rsi)
rmmovq %r11, 8(%rsi)
rmmovq %r12, 16(%rsi)
jmp move_check12
move4:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
rmmovq %r10, (%rsi)
rmmovq %r11, 8(%rsi)
rmmovq %r12, 16(%rsi)
rmmovq %r13, 24(%rsi)
jmp move_check13
move5:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
mrmovq 32(%rdi), %r14
rmmovq %r10, (%rsi)
rmmovq %r11, 8(%rsi)
rmmovq %r12, 16(%rsi)
rmmovq %r13, 24(%rsi)
rmmovq %r14, 32(%rsi)
move_check14:
andq %r14, %r14
jle move_check13
iaddq $1, %rax
move_check13:
andq %r13, %r13
jle move_check12
iaddq $1, %rax
move_check12:
andq %r12, %r12
jle move_check11
iaddq $1, %rax
move_check11:
andq %r11, %r11
jle move_check10
iaddq $1, %rax
move_check10:
andq %r10, %r10
jle Done
iaddq $1, %rax- ์ด์ฌํ ํ๋๋ 8.01๊น์ง ์ค์๋ค!!
์ด๋ถ ํ์#
router_tree: # now 0 <= rdx <= 5
iaddq $-3, %rdx
je move3
jg router_tree_R
router_tree_L: # rdx < 3
iaddq $2, %rdx
jl Done
je move1
jg move2
router_tree_R: # rdx > 3
iaddq $-1, %rdx
je move4
jg move5- ๋ผ์ฐํฐ๋ฅผ ์ด๋ถํ์์ผ๋ก ํด๋ดค๊ณ , 7.80๊น์ง ์ค์ผ ์ ์์๋ค.
- move ๋ก์ง๋ ์ข ์์ ํ๋ค.
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
isbig:
iaddq $-6, %rdx
jge Loop
router:
iaddq $3, %rdx
jl router_L
je move3
jg router_R
router_L:
iaddq $2, %rdx
jl Done
je move1
jmp move2
router_R:
iaddq $-1, %rdx
je move4
jmp move5
Loop:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
mrmovq 32(%rdi), %r14
mrmovq 40(%rdi), %r9
Npos_loop1:
andq %r10, %r10
rmmovq %r10, (%rsi)
jle Npos_loop2
iaddq $1, %rax
Npos_loop2:
andq %r11, %r11
rmmovq %r11, 8(%rsi)
jle Npos_loop3
iaddq $1, %rax
Npos_loop3:
andq %r12, %r12
rmmovq %r12, 16(%rsi)
jle Npos_loop4
iaddq $1, %rax
Npos_loop4:
andq %r13, %r13
rmmovq %r13, 24(%rsi)
jle Npos_loop5
iaddq $1, %rax
Npos_loop5:
andq %r14, %r14
rmmovq %r14, 32(%rsi)
jle Npos_loop6
iaddq $1, %rax
Npos_loop6:
andq %r9, %r9
rmmovq %r9, 40(%rsi)
jle Loop_end
iaddq $1, %rax
Loop_end:
iaddq $48, %rdi
iaddq $48, %rsi
iaddq $-6, %rdx
jge Loop
jmp router
move1:
mrmovq (%rdi), %r10
jmp move_check10
move2:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
jmp move_check11
move3:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
jmp move_check12
move4:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
jmp move_check13
move5:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
mrmovq 32(%rdi), %r14
move_check14:
andq %r14, %r14
rmmovq %r14, 32(%rsi)
jle move_check13
iaddq $1, %rax
move_check13:
andq %r13, %r13
rmmovq %r13, 24(%rsi)
jle move_check12
iaddq $1, %rax
move_check12:
andq %r12, %r12
rmmovq %r12, 16(%rsi)
jle move_check11
iaddq $1, %rax
move_check11:
andq %r11, %r11
rmmovq %r11, 8(%rsi)
jle move_check10
iaddq $1, %rax
move_check10:
andq %r10, %r10
rmmovq %r10, (%rsi)
jle Done
iaddq $1, %rax- 7.73์ ๋๊น์ง ์ค์๋๋ฐ.. ์ง์ง ๋๋ ๋ชปํ๊ฒ ๋ค. ๋ฉ ํด๋ ์์ค์ด๋ ๋ค ใ .ใ ์ฌ๊ธฐ์ ํฌ๊ธฐ
