Looking thru microscope - performance!
Producer consumer approach is very popular in network driver design. Transmit and Receive descriptors are usually laid out in physical pages for shared memory between host and device. And there is a producer index and a consumer index. To give it a ring structure, these indices are incremented modulo the size of the total memory each of those descriptor table takes. So indices wrap around. Depending on the device memory available, and the size of the descriptor structure we can probably use the native types like unsigned int_16 or something so that every time we increase the indices we don't need to check for wrapping around. But 16bit is 64K, so if the structure size is say 64 bytes, we need about 1K of descriptor before the indices wrap around. But if the device provides, say, 16KB of memory for this, then one option is to check after each increment of an index for possible wrap around. Well, this seems like an extra burden in high performance interrupt path. So the alternative would be to define a bit field that would wrap automatically after 16KB of addresses. So we can avoid the check for wrap around, and might have a performance boost. But if we look thru the microscope, we might have different conclusion. In the test and set case, there is hardly any branch hazard if you worry about it. Think about it! By looking at the code generated by gcc on an example case, I was surprised to see that the bit field approach is indeed slower. Take a look!
GAS LISTING /tmp/ccF3PvQX.s page 1
1 .file "sundry.c"
4 .text
5 Ltext0:
211 .section .rdata,"dr"
212 LC0:
213 0000 6D796269 .ascii "mybitfield.mytestfield is:%d\0"
213 74666965
213 6C642E6D
213 79746573
213 74666965
214 001d 000000 .text
216 .globl _bitfield_0
218 _bitfield_0:
1:sundry.c **** /*
2:sundry.c **** * sundry.c
3:sundry.c **** *
4:sundry.c **** * Created on: Sep 15, 2010
5:sundry.c **** * Author: Prokash Sinha
6:sundry.c **** */
7:sundry.c **** #include <stdio.h>
8:sundry.c ****
9:sundry.c **** void bitfield_0()
10:sundry.c **** {
220 LM1:
221 0000 55 pushl %ebp
222 0001 89E5 movl %esp, %ebp
223 0003 83EC18 subl $24, %esp
11:sundry.c ****
12:sundry.c **** unsigned int uintvar =0;
225 LM2:
226 0006 C745FC00 movl $0, -4(%ebp)
226 000000
13:sundry.c ****
14:sundry.c **** struct {
15:sundry.c **** unsigned int : 0;
16:sundry.c **** unsigned int mytestfield: 4;
17:sundry.c **** unsigned int pad: 28;
18:sundry.c **** }mybitfield;
19:sundry.c ****
20:sundry.c **** mybitfield.mytestfield = 7;
228 LM3:
229 000d 8B45F8 movl -8(%ebp), %eax
230 0010 83E0F0 andl $-16, %eax
231 0013 83C807 orl $7, %eax
232 0016 8945F8 movl %eax, -8(%ebp)
21:sundry.c **** printf("mybitfield.mytestfield is:%d", mybitfield.mytestfield);
234 LM4:
235 0019 0FB645F8 movzbl -8(%ebp), %eax
236 001d 83E00F andl $15, %eax
237 0020 89442404 movl %eax, 4(%esp)
238 0024 C7042400 movl $LC0, (%esp)
238 000000
239 002b E8000000 call _printf
239 00
22:sundry.c ****
23:sundry.c **** /*show that inc beyond the width makes it zero */
24:sundry.c **** mybitfield.mytestfield = 16;
GAS LISTING /tmp/ccF3PvQX.s page 2
241 LM5:
242 0030 8B45F8 movl -8(%ebp), %eax
243 0033 83E0F0 andl $-16, %eax
244 0036 8945F8 movl %eax, -8(%ebp)
25:sundry.c **** printf("mybitfield.mytestfield is:%d", mybitfield.mytestfield);
246 LM6:
247 0039 0FB645F8 movzbl -8(%ebp), %eax
248 003d 83E00F andl $15, %eax
249 0040 89442404 movl %eax, 4(%esp)
250 0044 C7042400 movl $LC0, (%esp)
250 000000
251 004b E8000000 call _printf
251 00
26:sundry.c ****
27:sundry.c **** /*show just the instructions to inc w/o worry for wrapping */
28:sundry.c **** mybitfield.mytestfield++ ;
253 LM7:
254 0050 0FB645F8 movzbl -8(%ebp), %eax
255 0054 83E00F andl $15, %eax
256 0057 8D5001 leal 1(%eax), %edx
257 005a 8B45F8 movl -8(%ebp), %eax
258 005d 83E20F andl $15, %edx
259 0060 83E0F0 andl $-16, %eax
260 0063 09D0 orl %edx, %eax
261 0065 8945F8 movl %eax, -8(%ebp)
29:sundry.c ****
30:sundry.c **** /* show how many instruction it generates */
31:sundry.c **** uintvar++;
263 LM8:
264 0068 8D45FC leal -4(%ebp), %eax
265 006b FF00 incl (%eax)
32:sundry.c **** if ( uintvar > 4000000)
267 LM9:
268 006d 817DFC00 cmpl $4000000, -4(%ebp)
268 093D00
269 0074 7607 jbe L1
33:sundry.c **** uintvar = 0;
271 LM10:
272 0076 C745FC00 movl $0, -4(%ebp)
272 000000
273 L1:
34:sundry.c ****
35:sundry.c **** }
275 LM11:
276 007d C9 leave
277 007e C3 ret
282 Lscope0:
286 .globl _main
288 _main:
36:sundry.c ****
37:sundry.c **** void main (void)
38:sundry.c **** {
290 LM12:
291 007f 55 pushl %ebp
292 0080 89E5 movl %esp, %ebp
293 0082 83EC08 subl $8, %esp
294 0085 83E4F0 andl $-16, %esp
GAS LISTING /tmp/ccF3PvQX.s page 3
295 0088 B8000000 movl $0, %eax
295 00
296 008d 83C00F addl $15, %eax
297 0090 83C00F addl $15, %eax
298 0093 C1E804 shrl $4, %eax
299 0096 C1E004 sall $4, %eax
300 0099 8945FC movl %eax, -4(%ebp)
301 009c 8B45FC movl -4(%ebp), %eax
302 009f E8000000 call __alloca
302 00
304 LM13:
305 00a4 E8000000 call ___main
305 00
39:sundry.c **** bitfield_0();
307 LM14:
308 00a9 E852FFFF call _bitfield_0
308 FF
40:sundry.c **** }
310 LM15:
311 00ae C9 leave
312 00af C3 ret
313 Lscope1:
315 .text
317 Letext:
GAS LISTING /tmp/ccF3PvQX.s page 4
DEFINED SYMBOLS
*ABS*:00000000 sundry.c
/tmp/ccF3PvQX.s:218 .text:00000000 _bitfield_0
/tmp/ccF3PvQX.s:288 .text:0000007f _main
UNDEFINED SYMBOLS
___main
__alloca
_printf
References (1)
-
Response: cv writing servicesPrograming always works for the solution of different problems. People spend some time to use these programing its best path for them for getting best result in no time. People have great advantage to use then for their problem solving. It’s like the air in atmosphere for the people.
Reader Comments