| 1 | # adapt to your installation |
|---|
| 2 | |
|---|
| 3 | TOPDIR?=.. |
|---|
| 4 | |
|---|
| 5 | include $(TOPDIR)/Makefile.local |
|---|
| 6 | |
|---|
| 7 | #link some uncommon libs static (boost png z) |
|---|
| 8 | STATIC=-Wl,-Bstatic |
|---|
| 9 | |
|---|
| 10 | # only use -DTMTO_USE_COMPRESSED_PAIR with g++-4.1 |
|---|
| 11 | GXX?=g++-4.1 # -DTMTO_USE_BOOST_COMPRESSED_PAIR |
|---|
| 12 | |
|---|
| 13 | GXX_PPE=ppu-g++ -m64 |
|---|
| 14 | |
|---|
| 15 | # lines below should not have to be edited |
|---|
| 16 | |
|---|
| 17 | #SDK_LIBS=-L$(SDK_DIR)/lib |
|---|
| 18 | #SDK_INC=-I$(SDK_DIR)/common/inc |
|---|
| 19 | CUDA_INC=-I$(CUDA_DIR)/include |
|---|
| 20 | STXXL_INC=-I$(STXXL_DIR)/include |
|---|
| 21 | LIBSPE2_INC=-I$(LIBSPE2_DIR) -I$(LIBSPE2_DIR)/spebase |
|---|
| 22 | STXXL_LIBS=-L$(STXXL_DIR)/lib |
|---|
| 23 | NVCC=$(CUDA_DIR)/bin/nvcc -keep --ptxas-options="-v --maxrregcount=128" $(NVCC_CFLAGS) |
|---|
| 24 | |
|---|
| 25 | DIFF=diff --strip-trailing-cr |
|---|
| 26 | |
|---|
| 27 | ifneq ($(BOOST_DIR),) |
|---|
| 28 | BOOST_INC=-I$(BOOST_DIR) |
|---|
| 29 | else |
|---|
| 30 | BOOST_INC= |
|---|
| 31 | endif |
|---|
| 32 | |
|---|
| 33 | ifneq ($(BOOST_DIR),) |
|---|
| 34 | BOOST_LIBDIR=-L$(BOOST_DIR)/stage/lib |
|---|
| 35 | else |
|---|
| 36 | BOOST_LIBDIR= |
|---|
| 37 | endif |
|---|
| 38 | |
|---|
| 39 | all: c |
|---|
| 40 | |
|---|
| 41 | sinclude .depend |
|---|
| 42 | |
|---|
| 43 | INC=-I$(TOPDIR) $(CUDA_INC) $(STXXL_INC) $(BOOST_INC) $(LIBSPE2_INC) |
|---|
| 44 | CFLAGS=$(M32) -DSTXXL_BOOST_THREADS -DSTXXL_BOOST_CONFIG -DSTXXL_BOOST_FILESYSTEM -DSTXXL_BOOST_RANDOM -DSTXXL_BOOST_TIMESTAMP |
|---|
| 45 | |
|---|
| 46 | LIBS=$(STXXL_LIBS) -L$(LIBSPE2_DIR) -L$(CUDA_DIR)/lib -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/lib $(BOOST_LIBDIR) $(STATIC) -lboost_program_options$(BOOST_INFIX)-mt -lstxxl -lboost_regex$(BOOST_INFIX)-mt -lboost_thread$(BOOST_INFIX)-mt -lboost_system$(BOOST_INFIX)-mt -lboost_filesystem$(BOOST_INFIX)-mt -lboost_iostreams$(BOOST_INFIX)-mt -Wl,-Bdynamic -lgcc_s -lpthread |
|---|
| 47 | |
|---|
| 48 | .PHONY: calculate_chain_cuda.E test |
|---|
| 49 | |
|---|
| 50 | clean: |
|---|
| 51 | PATH=.:$$PATH $(NVCC) -clean -keep -c $(INC) -o calculate_chain_cuda.o $(TOPDIR)/calculate_chain_cuda.cu |
|---|
| 52 | PATH=.:$$PATH $(NVCC) -clean -keep -c $(INC) -o cuda_localmem_kernel.o $(TOPDIR)/plugins/A51/cuda_localmem_kernel.cu |
|---|
| 53 | PATH=.:$$PATH $(NVCC) -clean -keep -c $(INC) -o cuda_bitslice_kernel.o $(TOPDIR)/plugins/A51/cuda_bitslice_kernel.cu |
|---|
| 54 | rm c *.o refa51 c_cuda c_sse |
|---|
| 55 | |
|---|
| 56 | calculate_chain_methods.cu: |
|---|
| 57 | echo |
|---|
| 58 | |
|---|
| 59 | nvcc_gcc_script: gcc |
|---|
| 60 | (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc |
|---|
| 61 | chmod 755 gcc |
|---|
| 62 | |
|---|
| 63 | calculate_chain_cuda.o: $(TOPDIR)/calculate_chain_cuda.cu |
|---|
| 64 | PATH=.:$$PATH $(NVCC) -c -keep $(CFLAGS) $(INC) -o $@ $< |
|---|
| 65 | |
|---|
| 66 | calculate_chain_cuda_bitsliced: $(TOPDIR)/calculate_chain_cuda_bitsliced.cu |
|---|
| 67 | (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc |
|---|
| 68 | chmod 755 gcc |
|---|
| 69 | PATH=.:$$PATH $(NVCC) -D_DEBUG -I/home/sascha/NVIDIA_CUDA_SDK/common/inc -keep $(CFLAGS) $(INC) -o $@ $< |
|---|
| 70 | |
|---|
| 71 | calculate_chain_cuda_bitslice.E: $(TOPDIR)/calculate_chain_cuda_bitsliced.cu |
|---|
| 72 | (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc |
|---|
| 73 | chmod 755 gcc |
|---|
| 74 | PATH=.:$$PATH $(NVCC) -E -keep $(CFLAGS) $(INC) $< |
|---|
| 75 | |
|---|
| 76 | calculate_chain.o: $(TOPDIR)/calculate_chain.cpp |
|---|
| 77 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 78 | |
|---|
| 79 | calculate_chain_ppe.o: $(TOPDIR)/calculate_chain.cpp |
|---|
| 80 | $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 81 | |
|---|
| 82 | calculate_chain_spe.o: $(TOPDIR)/calculate_chain_spe.cpp |
|---|
| 83 | spu-g++ -c $(HOST_CFLAGS) $(CFLAGS) -save-temps $(INC) -o $@ -c $< |
|---|
| 84 | |
|---|
| 85 | calculate_chain_spe.elf: calculate_chain_spe.o |
|---|
| 86 | spu-g++ $(HOST_CFLAGS) $(CFLAGS) -o $@ $< |
|---|
| 87 | |
|---|
| 88 | calculate_chain_spe_embed.o: calculate_chain_spe.elf |
|---|
| 89 | ppu-embedspu spe_program $< $@ |
|---|
| 90 | |
|---|
| 91 | refa51: $(TOPDIR)/reference/a51.cpp |
|---|
| 92 | $(GXX) -O3 -o $@ $^ |
|---|
| 93 | |
|---|
| 94 | readahead: $(TOPDIR)/tools/readahead.cpp |
|---|
| 95 | g++ -o $@ $^ -lpthread |
|---|
| 96 | |
|---|
| 97 | main_plugin.o: $(TOPDIR)/plugins/A51/main.cpp |
|---|
| 98 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 99 | |
|---|
| 100 | work_generators_plugin.o: $(TOPDIR)/plugins/A51/work_generators.cpp |
|---|
| 101 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 102 | |
|---|
| 103 | work_consumers_plugin.o: $(TOPDIR)/plugins/A51/work_consumers.cpp |
|---|
| 104 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 105 | |
|---|
| 106 | work_intermediates_plugin.o: $(TOPDIR)/plugins/A51/work_intermediates.cpp |
|---|
| 107 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 108 | |
|---|
| 109 | main_plugin_ppe.o: $(TOPDIR)/plugins/A51/main.cpp |
|---|
| 110 | $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 111 | |
|---|
| 112 | spe_bitslice_plugin.o: $(TOPDIR)/plugins/A51/spe_bitslice.cpp |
|---|
| 113 | $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 114 | |
|---|
| 115 | sse_bitslice_plugin.o: $(TOPDIR)/plugins/A51/sse_bitslice.cpp |
|---|
| 116 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 117 | |
|---|
| 118 | sse_bitslice_kernel.o: $(TOPDIR)/plugins/A51/sse_bitslice_kernel.cpp |
|---|
| 119 | $(GXX) $(SSE_KERNEL_CFLAGS) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 120 | |
|---|
| 121 | cuda_localmem_plugin.o: $(TOPDIR)/plugins/A51/cuda_localmem.cpp |
|---|
| 122 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 123 | |
|---|
| 124 | cuda_bitslice_plugin.o: $(TOPDIR)/plugins/A51/cuda_bitslice.cpp |
|---|
| 125 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 126 | |
|---|
| 127 | cuda_bitslice_kernel.o: $(TOPDIR)/plugins/A51/cuda_bitslice_kernel.cu |
|---|
| 128 | (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc |
|---|
| 129 | chmod 755 gcc |
|---|
| 130 | PATH=.:$$PATH $(NVCC) $(INC) $(CFLAGS) -c -o $@ $< 2>&1 | grep -v 'Warning: Cannot tell what pointer points to' |
|---|
| 131 | |
|---|
| 132 | cuda_bitslice_kernel.E: $(TOPDIR)/plugins/A51/cuda_bitslice_kernel.cu |
|---|
| 133 | (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc |
|---|
| 134 | chmod 755 gcc |
|---|
| 135 | PATH=.:$$PATH $(NVCC) $(INC) $(CFLAGS) -E $< 2>&1 | grep -v 'Warning: Cannot tell what pointer points to' |
|---|
| 136 | |
|---|
| 137 | cuda_localmem_kernel.o: $(TOPDIR)/plugins/A51/cuda_localmem_kernel.cu |
|---|
| 138 | (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc |
|---|
| 139 | chmod 755 gcc |
|---|
| 140 | PATH=.:$$PATH $(NVCC) $(INC) $(CFLAGS) -c -o $@ $< |
|---|
| 141 | |
|---|
| 142 | cuda_device_plugin.o: $(TOPDIR)/plugins/A51/cuda.cpp |
|---|
| 143 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $< |
|---|
| 144 | |
|---|
| 145 | depend: |
|---|
| 146 | $(GXX) $(INC) -M -MT calculate_chain.o $(TOPDIR)/calculate_chain.cpp > .depend |
|---|
| 147 | $(GXX_PPE) $(INC) -M -MT calculate_chain_ppe.o $(TOPDIR)/calculate_chain.cpp >> .depend |
|---|
| 148 | $(GXX) $(INC) -M -MT main_plugin.o $(TOPDIR)/plugins/A51/main.cpp >> .depend |
|---|
| 149 | $(GXX) $(INC) -M -MT work_generators_plugin.o $(TOPDIR)/plugins/A51/work_generators.cpp >> .depend |
|---|
| 150 | $(GXX) $(INC) -M -MT work_consumers_plugin.o $(TOPDIR)/plugins/A51/work_consumers.cpp >> .depend |
|---|
| 151 | $(GXX) $(INC) -M -MT work_intermediates_plugin.o $(TOPDIR)/plugins/A51/work_intermediates.cpp >> .depend |
|---|
| 152 | $(GXX_PPE) $(INC) -M -MT main_plugin_ppe.o $(TOPDIR)/plugins/A51/main.cpp >> .depend |
|---|
| 153 | $(GXX) $(INC) -M -MT cuda_localmem_plugin.o $(TOPDIR)/plugins/A51/cuda_localmem.cpp >> .depend |
|---|
| 154 | $(GXX) $(INC) -M -MT cuda_bitslice_plugin.o $(TOPDIR)/plugins/A51/cuda_bitslice.cpp >> .depend |
|---|
| 155 | $(GXX) $(INC) -M -MT cuda_device_plugin.o $(TOPDIR)/plugins/A51/cuda.cpp >> .depend |
|---|
| 156 | $(GXX) $(INC) -M -MT sse_bitslice_plugin.o $(TOPDIR)/plugins/A51/sse_bitslice.cpp >> .depend |
|---|
| 157 | $(GXX) $(INC) -M -MT spe_bitslice_plugin.o $(TOPDIR)/plugins/A51/spe_bitslice.cpp >> .depend |
|---|
| 158 | $(GXX) $(INC) -M -MT sse_bitslice_kernel.o $(TOPDIR)/plugins/A51/sse_bitslice_kernel.cpp >> .depend |
|---|
| 159 | for k in cuda_bitslice_kernel cuda_localmem_kernel; do \ |
|---|
| 160 | $(GXX) -x c++ $(INC) -M -MT $$k.o $(TOPDIR)/plugins/A51/$$k.cu >> .depend; done |
|---|
| 161 | spu-g++ $(INC) -M $(TOPDIR)/calculate_chain_spe.cpp >> .depend |
|---|
| 162 | |
|---|
| 163 | c_ppe: calculate_chain_ppe.o main_plugin_ppe.o calculate_chain_spe_embed.o spe_bitslice_plugin.o |
|---|
| 164 | $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lgcc_s -lstxxl -lspe2 |
|---|
| 165 | |
|---|
| 166 | c_sse: calculate_chain.o main_plugin.o work_generators_plugin.o work_consumers_plugin.o work_intermediates_plugin.o sse_bitslice_plugin.o sse_bitslice_kernel.o |
|---|
| 167 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lgcc_s -lstxxl |
|---|
| 168 | |
|---|
| 169 | c_cuda: calculate_chain.o main_plugin.o cuda_device_plugin.o cuda_bitslice_plugin.o cuda_bitslice_kernel.o work_generators_plugin.o work_consumers_plugin.o work_intermediates_plugin.o cuda_localmem_plugin.o cuda_localmem_kernel.o |
|---|
| 170 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lcudart -lgcc_s -lstxxl |
|---|
| 171 | |
|---|
| 172 | c_raw: calculate_chain.o main_plugin.o |
|---|
| 173 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lgcc_s -lstxxl |
|---|
| 174 | |
|---|
| 175 | c: calculate_chain.o main_plugin.o cuda_device_plugin.o cuda_bitslice_plugin.o cuda_bitslice_kernel.o sse_bitslice_plugin.o sse_bitslice_kernel.o work_generators_plugin.o work_consumers_plugin.o work_intermediates_plugin.o |
|---|
| 176 | $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lcudart -lgcc_s -lstxxl |
|---|
| 177 | |
|---|
| 178 | # not used or for debugging |
|---|
| 179 | |
|---|
| 180 | calculate_chain.E: $(TOPDIR)/calculate_chain.cpp |
|---|
| 181 | $(GXX) $(INC) -E $(TOPDIR)/calculate_chain.cpp |
|---|
| 182 | |
|---|
| 183 | source_files_list: |
|---|
| 184 | @find $(TOPDIR)/ -type f ! -name '*~' ! -path '$(TOPDIR)/doc/*' ! -path '*/.svn/*' ! -path '$(TOPDIR)/fpga/*' ! -path $(TOPDIR)/obj/\* |
|---|
| 185 | |
|---|
| 186 | test: |
|---|
| 187 | ./c --nodefault --operations 512 --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr::tablesize=32::advance=0::force --work increment --consume print --logger verbose generate --chains 1024 --chainlength 3072 > testdata |
|---|
| 188 | @echo -n +++ Test " "; if $(DIFF) testdata $(TOPDIR)/data/testdata; then echo -n OK; else echo -n FAILED; fi; echo " +++" |
|---|
| 189 | |
|---|
| 190 | test2: |
|---|
| 191 | ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr::tablesize=32::advance=0::force --work increment --consume print --logger verbose generate --chains 1024 --chainlength 3072000 > testdata2 |
|---|
| 192 | @echo -n +++ Test " "; if $(DIFF) testdata2 $(TOPDIR)/data/testdata2; then echo -n OK; else echo -n FAILED; fi; echo " +++" |
|---|
| 193 | |
|---|
| 194 | test3: |
|---|
| 195 | ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr::tablesize=32::advance=0::force --work increment --consume print --logger normal generate --chains 1024 --chainlength 3072000 --intermediate filter:runlength=512 > testdata3 |
|---|
| 196 | @echo -n +++ Test " "; if $(DIFF) testdata3 $(TOPDIR)/data/testdata3; then echo -n OK; else echo -n FAILED; fi; echo " +++" |
|---|
| 197 | |
|---|
| 198 | test4: |
|---|
| 199 | ./c --nodefault --operations 512 --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr2::tablesize=32::advance=0 --work increment --consume print --logger verbose generate --chains 16 --chainlength 3072 > testdata4 |
|---|
| 200 | @echo -n +++ Test " "; if $(DIFF) testdata4 $(TOPDIR)/data/testdata4; then echo -n OK; else echo -n FAILED; fi; echo " +++" |
|---|
| 201 | |
|---|
| 202 | test5: |
|---|
| 203 | ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr2::tablesize=32::advance=0 --work increment --consume print --logger verbose generate --chains 16 --chainlength 3072000 > testdata5 |
|---|
| 204 | @echo -n +++ Test " "; if $(DIFF) testdata5 $(TOPDIR)/data/testdata5; then echo -n OK; else echo -n FAILED; fi; echo " +++" |
|---|
| 205 | |
|---|
| 206 | test6: |
|---|
| 207 | ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr2::tablesize=32::advance=0 --work increment --consume print --logger normal generate --chains 16 --chainlength 3072000 --intermediate filter:runlength=512 > testdata6 |
|---|
| 208 | @echo -n +++ Test " "; if $(DIFF) testdata6 $(TOPDIR)/data/testdata6; then echo -n OK; else echo -n FAILED; fi; echo " +++" |
|---|
| 209 | |
|---|
| 210 | test23: refa51 |
|---|
| 211 | ./c_cuda --work sort:source=random::limit=1K:ram=1 --icondition distinguished_point:bits=15 --condition rounds:rounds=2 --advance 2048 --device cuda:implementation=bitslice:blocks=4:threads=128 --consume print:all --operations 512 generate --chainlength 1000000 --chains 8192 --intermediate sort:parts=1:ram=300 2>&1 | grep results | awk '{ print $$3 " " $$7 }' | while read s e; do echo -n "$$e "; ./refa51 -c $$e -o 1000000 $$s 15 2 2048; done |
|---|
| 212 | |
|---|
| 213 | test24: refa51 |
|---|
| 214 | ./c_cuda --work sort:source=random::limit=1K:ram=1 --icondition distinguished_point:bits=15 --condition rounds:rounds=2 --advance 2048 --device cuda:implementation=sharedmem:blocks=4:threads=256 --consume print:all --operations 512 generate --chainlength 1000000 --chains 8192 --intermediate sort:parts=1:ram=300 2>&1 | grep results | awk '{ print $$3 " " $$7 }' | while read s e; do echo -n "$$e "; ./refa51 -c $$e -o 1000000 $$s 15 2 2048; done |
|---|