root/Makefile

Revision 89, 11.3 KB (checked in by sascha, 2 years ago)

fixes for 32bit systems

Line 
1# adapt to your installation
2
3TOPDIR?=..
4
5include $(TOPDIR)/Makefile.local
6
7#link some uncommon libs static (boost png z)
8STATIC=-Wl,-Bstatic
9
10# only use -DTMTO_USE_COMPRESSED_PAIR with g++-4.1
11GXX?=g++-4.1 # -DTMTO_USE_BOOST_COMPRESSED_PAIR
12
13GXX_PPE=ppu-g++ -m64
14
15# lines below should not have to be edited
16
17#SDK_LIBS=-L$(SDK_DIR)/lib
18#SDK_INC=-I$(SDK_DIR)/common/inc
19CUDA_INC=-I$(CUDA_DIR)/include
20STXXL_INC=-I$(STXXL_DIR)/include
21LIBSPE2_INC=-I$(LIBSPE2_DIR) -I$(LIBSPE2_DIR)/spebase
22STXXL_LIBS=-L$(STXXL_DIR)/lib
23NVCC=$(CUDA_DIR)/bin/nvcc -keep --ptxas-options="-v --maxrregcount=128" $(NVCC_CFLAGS)
24
25DIFF=diff --strip-trailing-cr
26
27ifneq ($(BOOST_DIR),)
28BOOST_INC=-I$(BOOST_DIR)
29else
30BOOST_INC=
31endif
32
33ifneq ($(BOOST_DIR),)
34BOOST_LIBDIR=-L$(BOOST_DIR)/stage/lib
35else
36BOOST_LIBDIR=
37endif
38
39all: c
40
41sinclude .depend
42
43INC=-I$(TOPDIR) $(CUDA_INC) $(STXXL_INC) $(BOOST_INC) $(LIBSPE2_INC)
44CFLAGS=$(M32) -DSTXXL_BOOST_THREADS -DSTXXL_BOOST_CONFIG -DSTXXL_BOOST_FILESYSTEM -DSTXXL_BOOST_RANDOM -DSTXXL_BOOST_TIMESTAMP
45
46LIBS=$(STXXL_LIBS) -L$(LIBSPE2_DIR) -L$(CUDA_DIR)/lib -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/lib $(BOOST_LIBDIR) $(STATIC) -lboost_program_options$(BOOST_INFIX)-mt -lstxxl -lboost_regex$(BOOST_INFIX)-mt -lboost_thread$(BOOST_INFIX)-mt -lboost_system$(BOOST_INFIX)-mt -lboost_filesystem$(BOOST_INFIX)-mt -lboost_iostreams$(BOOST_INFIX)-mt -Wl,-Bdynamic -lgcc_s -lpthread
47
48.PHONY: calculate_chain_cuda.E test
49
50clean:
51        PATH=.:$$PATH $(NVCC) -clean -keep -c $(INC) -o calculate_chain_cuda.o $(TOPDIR)/calculate_chain_cuda.cu
52        PATH=.:$$PATH $(NVCC) -clean -keep -c $(INC) -o cuda_localmem_kernel.o $(TOPDIR)/plugins/A51/cuda_localmem_kernel.cu
53        PATH=.:$$PATH $(NVCC) -clean -keep -c $(INC) -o cuda_bitslice_kernel.o $(TOPDIR)/plugins/A51/cuda_bitslice_kernel.cu
54        rm c *.o refa51 c_cuda c_sse
55
56calculate_chain_methods.cu:
57        echo
58
59nvcc_gcc_script: gcc
60        (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc
61        chmod 755 gcc
62
63calculate_chain_cuda.o: $(TOPDIR)/calculate_chain_cuda.cu
64        PATH=.:$$PATH $(NVCC) -c -keep $(CFLAGS) $(INC) -o $@ $<
65
66calculate_chain_cuda_bitsliced: $(TOPDIR)/calculate_chain_cuda_bitsliced.cu
67        (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc
68        chmod 755 gcc
69        PATH=.:$$PATH $(NVCC) -D_DEBUG -I/home/sascha/NVIDIA_CUDA_SDK/common/inc -keep $(CFLAGS) $(INC) -o $@ $<
70
71calculate_chain_cuda_bitslice.E: $(TOPDIR)/calculate_chain_cuda_bitsliced.cu
72        (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc
73        chmod 755 gcc
74        PATH=.:$$PATH $(NVCC) -E -keep $(CFLAGS) $(INC) $<
75
76calculate_chain.o: $(TOPDIR)/calculate_chain.cpp
77        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
78
79calculate_chain_ppe.o: $(TOPDIR)/calculate_chain.cpp
80        $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
81
82calculate_chain_spe.o: $(TOPDIR)/calculate_chain_spe.cpp
83        spu-g++ -c $(HOST_CFLAGS) $(CFLAGS) -save-temps $(INC) -o $@ -c $<
84
85calculate_chain_spe.elf: calculate_chain_spe.o
86        spu-g++ $(HOST_CFLAGS) $(CFLAGS) -o $@ $<
87
88calculate_chain_spe_embed.o: calculate_chain_spe.elf
89        ppu-embedspu spe_program $< $@
90
91refa51: $(TOPDIR)/reference/a51.cpp
92        $(GXX) -O3 -o $@ $^
93
94readahead: $(TOPDIR)/tools/readahead.cpp
95        g++ -o $@ $^ -lpthread
96
97main_plugin.o: $(TOPDIR)/plugins/A51/main.cpp
98        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
99
100work_generators_plugin.o: $(TOPDIR)/plugins/A51/work_generators.cpp
101        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
102
103work_consumers_plugin.o: $(TOPDIR)/plugins/A51/work_consumers.cpp
104        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
105
106work_intermediates_plugin.o: $(TOPDIR)/plugins/A51/work_intermediates.cpp
107        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
108
109main_plugin_ppe.o: $(TOPDIR)/plugins/A51/main.cpp
110        $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
111
112spe_bitslice_plugin.o: $(TOPDIR)/plugins/A51/spe_bitslice.cpp
113        $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
114
115sse_bitslice_plugin.o: $(TOPDIR)/plugins/A51/sse_bitslice.cpp
116        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
117
118sse_bitslice_kernel.o: $(TOPDIR)/plugins/A51/sse_bitslice_kernel.cpp
119        $(GXX) $(SSE_KERNEL_CFLAGS) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
120
121cuda_localmem_plugin.o: $(TOPDIR)/plugins/A51/cuda_localmem.cpp
122        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
123
124cuda_bitslice_plugin.o: $(TOPDIR)/plugins/A51/cuda_bitslice.cpp
125        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
126
127cuda_bitslice_kernel.o: $(TOPDIR)/plugins/A51/cuda_bitslice_kernel.cu
128        (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc
129        chmod 755 gcc
130        PATH=.:$$PATH $(NVCC) $(INC) $(CFLAGS) -c -o $@ $< 2>&1 | grep -v 'Warning: Cannot tell what pointer points to'
131
132cuda_bitslice_kernel.E: $(TOPDIR)/plugins/A51/cuda_bitslice_kernel.cu
133        (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc
134        chmod 755 gcc
135        PATH=.:$$PATH $(NVCC) $(INC) $(CFLAGS) -E $< 2>&1 | grep -v 'Warning: Cannot tell what pointer points to'
136
137cuda_localmem_kernel.o: $(TOPDIR)/plugins/A51/cuda_localmem_kernel.cu
138        (echo "#!/bin/sh"; echo "exec $(GXX) \"\$$@\"") > gcc
139        chmod 755 gcc
140        PATH=.:$$PATH $(NVCC) $(INC) $(CFLAGS) -c -o $@ $<
141
142cuda_device_plugin.o: $(TOPDIR)/plugins/A51/cuda.cpp
143        $(GXX) $(HOST_CFLAGS) $(CFLAGS) $(INC) -c -o $@ $<
144
145depend:
146        $(GXX) $(INC) -M -MT calculate_chain.o $(TOPDIR)/calculate_chain.cpp > .depend
147        $(GXX_PPE) $(INC) -M -MT calculate_chain_ppe.o $(TOPDIR)/calculate_chain.cpp >> .depend
148        $(GXX) $(INC) -M -MT main_plugin.o $(TOPDIR)/plugins/A51/main.cpp >> .depend
149        $(GXX) $(INC) -M -MT work_generators_plugin.o $(TOPDIR)/plugins/A51/work_generators.cpp >> .depend
150        $(GXX) $(INC) -M -MT work_consumers_plugin.o $(TOPDIR)/plugins/A51/work_consumers.cpp >> .depend
151        $(GXX) $(INC) -M -MT work_intermediates_plugin.o $(TOPDIR)/plugins/A51/work_intermediates.cpp >> .depend
152        $(GXX_PPE) $(INC) -M -MT main_plugin_ppe.o $(TOPDIR)/plugins/A51/main.cpp >> .depend
153        $(GXX) $(INC) -M -MT cuda_localmem_plugin.o $(TOPDIR)/plugins/A51/cuda_localmem.cpp >> .depend
154        $(GXX) $(INC) -M -MT cuda_bitslice_plugin.o $(TOPDIR)/plugins/A51/cuda_bitslice.cpp >> .depend
155        $(GXX) $(INC) -M -MT cuda_device_plugin.o $(TOPDIR)/plugins/A51/cuda.cpp >> .depend
156        $(GXX) $(INC) -M -MT sse_bitslice_plugin.o $(TOPDIR)/plugins/A51/sse_bitslice.cpp >> .depend
157        $(GXX) $(INC) -M -MT spe_bitslice_plugin.o $(TOPDIR)/plugins/A51/spe_bitslice.cpp >> .depend
158        $(GXX) $(INC) -M -MT sse_bitslice_kernel.o $(TOPDIR)/plugins/A51/sse_bitslice_kernel.cpp >> .depend
159        for k in cuda_bitslice_kernel cuda_localmem_kernel; do \
160                $(GXX) -x c++ $(INC) -M -MT $$k.o $(TOPDIR)/plugins/A51/$$k.cu >> .depend; done
161        spu-g++ $(INC) -M $(TOPDIR)/calculate_chain_spe.cpp >> .depend
162
163c_ppe: calculate_chain_ppe.o main_plugin_ppe.o calculate_chain_spe_embed.o spe_bitslice_plugin.o
164        $(GXX_PPE) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lgcc_s -lstxxl -lspe2
165
166c_sse: calculate_chain.o main_plugin.o work_generators_plugin.o work_consumers_plugin.o work_intermediates_plugin.o sse_bitslice_plugin.o sse_bitslice_kernel.o
167        $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lgcc_s -lstxxl
168
169c_cuda: calculate_chain.o main_plugin.o cuda_device_plugin.o cuda_bitslice_plugin.o cuda_bitslice_kernel.o work_generators_plugin.o work_consumers_plugin.o work_intermediates_plugin.o cuda_localmem_plugin.o cuda_localmem_kernel.o
170        $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lcudart -lgcc_s -lstxxl
171
172c_raw: calculate_chain.o main_plugin.o
173        $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lgcc_s -lstxxl
174
175c: calculate_chain.o main_plugin.o cuda_device_plugin.o cuda_bitslice_plugin.o cuda_bitslice_kernel.o sse_bitslice_plugin.o sse_bitslice_kernel.o work_generators_plugin.o work_consumers_plugin.o work_intermediates_plugin.o
176        $(GXX) $(HOST_CFLAGS) $(CFLAGS) -o $@ $^ $(LIBS) -lcudart -lgcc_s -lstxxl
177
178# not used or for debugging
179
180calculate_chain.E: $(TOPDIR)/calculate_chain.cpp
181        $(GXX) $(INC) -E $(TOPDIR)/calculate_chain.cpp
182
183source_files_list:
184        @find $(TOPDIR)/ -type f ! -name '*~' ! -path '$(TOPDIR)/doc/*' ! -path '*/.svn/*' ! -path '$(TOPDIR)/fpga/*' ! -path $(TOPDIR)/obj/\*
185
186test:
187        ./c --nodefault --operations 512 --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr::tablesize=32::advance=0::force --work increment --consume print --logger verbose generate --chains 1024 --chainlength 3072 > testdata
188        @echo -n +++ Test " "; if $(DIFF) testdata $(TOPDIR)/data/testdata; then echo -n OK; else echo -n FAILED; fi; echo " +++"
189
190test2:
191        ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr::tablesize=32::advance=0::force --work increment --consume print --logger verbose generate --chains 1024 --chainlength 3072000 > testdata2
192        @echo -n +++ Test " "; if $(DIFF) testdata2 $(TOPDIR)/data/testdata2; then echo -n OK; else echo -n FAILED; fi; echo " +++"
193
194test3:
195        ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr::tablesize=32::advance=0::force --work increment --consume print --logger normal generate --chains 1024 --chainlength 3072000 --intermediate filter:runlength=512 > testdata3
196        @echo -n +++ Test " "; if $(DIFF) testdata3 $(TOPDIR)/data/testdata3; then echo -n OK; else echo -n FAILED; fi; echo " +++"
197
198test4:
199        ./c --nodefault --operations 512 --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr2::tablesize=32::advance=0 --work increment --consume print --logger verbose generate --chains 16 --chainlength 3072 > testdata4
200        @echo -n +++ Test " "; if $(DIFF) testdata4 $(TOPDIR)/data/testdata4; then echo -n OK; else echo -n FAILED; fi; echo " +++"
201
202test5:
203        ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr2::tablesize=32::advance=0 --work increment --consume print --logger verbose generate --chains 16 --chainlength 3072000 > testdata5
204        @echo -n +++ Test " "; if $(DIFF) testdata5 $(TOPDIR)/data/testdata5; then echo -n OK; else echo -n FAILED; fi; echo " +++"
205
206test6:
207        ./c --nodefault --algorithm A51 --condition rounds:rounds=32 --implementation sharedmem --device cuda --roundfunc xor:condition=distinguished_point::bits=15:generator=lfsr2::tablesize=32::advance=0 --work increment --consume print --logger normal generate --chains 16 --chainlength 3072000 --intermediate filter:runlength=512 > testdata6
208        @echo -n +++ Test " "; if $(DIFF) testdata6 $(TOPDIR)/data/testdata6; then echo -n OK; else echo -n FAILED; fi; echo " +++"
209
210test23: refa51
211        ./c_cuda --work sort:source=random::limit=1K:ram=1 --icondition distinguished_point:bits=15 --condition rounds:rounds=2 --advance 2048 --device cuda:implementation=bitslice:blocks=4:threads=128 --consume print:all --operations 512 generate --chainlength 1000000 --chains 8192 --intermediate sort:parts=1:ram=300 2>&1 | grep results | awk '{ print $$3 " " $$7 }' | while read s e; do echo -n "$$e "; ./refa51 -c $$e -o 1000000 $$s 15 2 2048; done
212
213test24: refa51
214        ./c_cuda --work sort:source=random::limit=1K:ram=1 --icondition distinguished_point:bits=15 --condition rounds:rounds=2 --advance 2048 --device cuda:implementation=sharedmem:blocks=4:threads=256 --consume print:all --operations 512 generate --chainlength 1000000 --chains 8192 --intermediate sort:parts=1:ram=300 2>&1 | grep results | awk '{ print $$3 " " $$7 }' | while read s e; do echo -n "$$e "; ./refa51 -c $$e -o 1000000 $$s 15 2 2048; done
Note: See TracBrowser for help on using the browser.