/[dynamips]/trunk/profiler.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/profiler.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 12 - (show annotations)
Sat Oct 6 16:45:40 2007 UTC (12 years ago) by dpavlin
File MIME type: text/plain
File size: 9533 byte(s)
make working copy

1 /*
2 * Contribution of Mtve.
3 */
4
5 #include <fcntl.h>
6 #include <unistd.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <string.h>
10
11 #define THREADED 1
12
13 #if THREADED
14 #include <pthread.h>
15 #endif
16
17 /*
18 * Call to profiling routine .mcount is automatically inserted by gcc -p.
19 *
20 * However, standard .mcount from (g)libc is not working well for me
21 * with optimized (-O3 -fomit-frame-pointer) threaded code,
22 * at least because it doesn't save all registers.
23 *
24 * So here is another square wheel. It works only on IA32 (i386).
25 *
26 * Theory:
27 * .mcount is called like this
28 *
29 * 08048479 <some_func>:
30 * 8048679: 55 push %ebp # can be
31 * 804847a: 89 e5 movl %esp,%ebp # absent
32 * 804847c: 83 ec 1c subl $28,%esp
33 * 804847f: 55 pushl %ebp
34 * 8048480: 57 pushl %edi
35 * 8048481: 56 pushl %esi
36 * 8048482: 53 pushl %ebx
37 * 8048483: e8 94 fe ff ff call .mcount
38 * 08048488 <some_func_x>
39 *
40 * So in the entrance of .mcount we have in stack
41 *
42 * %esp -> some_func_x (dword), where mcount should return
43 * saved registers (4 dwords in example)
44 * stack frame (28 bytes in example)
45 * some_func_callee (dword)
46 *
47 * We will:
48 * - check if the code of some_func matches this pattern
49 * - find some_func address and depth of stack
50 * - modify stack by replacing some_func_callee address to ours
51 * - collect statistic
52 */
53
54 /* better to be a prime number */
55 #define FUNCSMAX 32749
56
57 #define CSTACKSIZE 256
58
59 #if THREADED
60 /* better to be a prime number */
61 #define THREADSMAX 37
62 #else
63 #define THREADSMAX 1
64 #endif
65
66 static struct {
67 int addr;
68 int enters;
69 int exits;
70 int aways;
71 int rets;
72 long long timetotal;
73 long long timeoutside;
74 } arr[FUNCSMAX + 1];
75
76 static struct {
77 #if THREADED
78 pthread_t tid;
79 #endif
80 int depth;
81 int ret[CSTACKSIZE];
82 int func[CSTACKSIZE];
83 } cstack[THREADSMAX];
84
85 #define core() (*(char *)0 = 0)
86
87 #if __GNUC__ > 2
88
89 #define NOPROF __attribute__ ((no_instrument_function))
90
91 /* forward declaration of all functions */
92 static inline long long curtime() NOPROF;
93 static inline int findaddr() NOPROF;
94 static inline int findthread() NOPROF;
95 static void stat_enter() NOPROF;
96 static void stat_exit() NOPROF;
97 static void stat_away() NOPROF;
98 static void stat_ret() NOPROF;
99 void profiler__asm_enter_stub() NOPROF;
100 void profiler__asm_exit_stub() NOPROF;
101 void profiler__c_enter() NOPROF;
102 void profiler__c_exit() NOPROF;
103 void profiler_savestat() NOPROF;
104
105 #else
106 #warning be sure to compile profiler.c WITHOUT -p flag
107 #endif
108
109 static inline long long curtime(void)
110 {
111 long long t;
112
113 asm volatile(".byte 15;.byte 49" : "=A"(t)); /* RDTSC */
114 return t;
115 }
116
117 static inline int findaddr(int addr)
118 {
119 int i,j;
120
121 i = j = addr % FUNCSMAX;
122 do {
123 if (arr[i].addr == addr) {
124 return i;
125 } else if (arr[i].addr == 0) {
126 arr[i].addr = addr;
127 return i;
128 }
129 i = (i+1) % FUNCSMAX;
130 } while (i != j);
131 core(); /* increase FUNCSMAX */
132 return(FUNCSMAX);
133 }
134
135 static inline int findthread(void)
136 {
137 #if THREADED
138 int i,j;
139 pthread_t k = pthread_self();
140
141 i = j = (int)k % THREADSMAX;
142 do {
143 if (cstack[i].tid == k) {
144 return i;
145 } else if (cstack[i].tid == 0) {
146 cstack[i].tid = k;
147 return i;
148 }
149 i = (i+1) % THREADSMAX;
150 } while (i != j);
151 core(); /* increase THREADSMAX */
152 #endif
153 return(0);
154 }
155
156 static void stat_enter(int slot)
157 {
158 arr[slot].enters++;
159 arr[slot].timetotal -= curtime();
160 }
161
162 static void stat_exit(int slot)
163 {
164 arr[slot].exits++;
165 arr[slot].timetotal += curtime();
166 }
167
168 static void stat_away(int slot)
169 {
170 arr[slot].aways++;
171 arr[slot].timeoutside -= curtime();
172 }
173
174 static void stat_ret(int slot)
175 {
176 arr[slot].rets++;
177 arr[slot].timeoutside += curtime();
178 }
179
180 void profiler__asm_enter(void);
181 void profiler__asm_exit(void);
182
183 #define A __asm__
184
185 /*
186 * that't really weird but compatible with both gcc2 and gcc3
187 *
188 * things i don't want to care of
189 * - what size on stack pusha/pops use
190 * - what current function framing is
191 */
192 void profiler__asm_enter_stub(void)
193 {
194 A(" .globl .mcount ");
195 A(" .globl profiler__asm_enter ");
196 A("profiler__asm_enter: ");
197 A(".mcount: ");
198 A(" pushl %eax "); /* save %eax */
199 A(" movl %esp,%eax "); /* %eax = old %esp - 4 */
200 A(" pusha "); /* save all registers */
201 A(" push %eax "); /* push parameter to stack */
202 A(" call profiler__c_enter "); /* call c routine */
203 A(" pop %eax "); /* clear parameter from stack */
204 A(" popa "); /* restore all registers */
205 A(" pop %eax "); /* restore %eax */
206 A(" ret "); /* return */
207 }
208
209 void profiler__asm_exit_stub(void)
210 {
211 A("profiler__asm_exit: ");
212 A(" pushl $0xdeadbeaf "); /* placeholder to return address */
213 A(" pushl %eax "); /* save %eax */
214 A(" movl %esp,%eax "); /* %eax = addr of placeholder - 4 */
215 A(" pusha "); /* save all registers */
216 A(" pushl %eax "); /* push parameter to stack */
217 A(" call profiler__c_exit "); /* call C routine */
218 A(" popl %eax "); /* clear parameter from stack */
219 A(" popa "); /* restore all registers */
220 A(" popl %eax "); /* restore %eax */
221 A(" ret "); /* return */
222 }
223
224 void profiler__c_enter(int *sp_1)
225 {
226 unsigned char *pc;
227 int stdepth = 2, i, thr, slot, gcc2 = 1;
228
229 if (sizeof(int) != 4)
230 core(); /* sizeof int != 4 */
231 if (sizeof(long long) != 8)
232 core(); /* sizeof long long != 8 */
233 if (sizeof(void *) != 4)
234 core(); /* sizeof pointer != 4 */
235
236 pc = (char *)(sp_1[1]);
237
238 pc -= 5;
239 if (*pc != 0xe8) /* call <relative> */
240 core(); /* called not by 0xe8 */
241 if ((int)pc + 5 + *(int *)(pc+1) != (int)profiler__asm_enter)
242 core(); /* call points not to .mcount */
243
244 if (pc[-1] == 0x53) /* push %ebx */
245 pc--, stdepth++;
246 if (pc[-1] == 0x56) /* push %esi */
247 pc--, stdepth++;
248 if (pc[-1] == 0x57) /* push %edi */
249 pc--, stdepth++;
250 if (pc[-1] == 0x55) /* push %ebp */
251 pc--, stdepth++;
252
253 if (pc[-6]==0x81 && pc[-5]==0xec && pc[-2]==0 && pc[-1]==0) {
254 /* sub <dword>,%esp */
255 stdepth += *(int *)(pc - 4)/4;
256 pc -= 6;
257 } else if (pc[-3]==0x83 && pc[-2]==0xec && pc[-1]%4==0) {
258 /* sub <byte>,%esp */
259 stdepth += pc[-1]/4;
260 pc -= 3;
261 } else
262 gcc2 = 0;
263
264 while (pc[-1] >= 0x50 && pc[-1] <= 0x57) /* push %e[reg] */
265 pc--, stdepth++;
266
267 /* "pushl %ebp; movl %esp,%ebp;" */
268 if (pc[-3]==0x55 && pc[-2]==0x89 && pc[-1]==0xe5) {
269 stdepth++;
270 pc -= 3;
271 } else if(!gcc2)
272 core(); /* unknown prologue, examine x/10i pc-10 */
273
274 /*
275 * Now we know that it's standard prologue, so we modify the stack
276 */
277 thr = findthread();
278 slot = findaddr((int)pc);
279
280 i = cstack[thr].depth++;
281 if(i >= CSTACKSIZE)
282 core(); /* call stack overflow */
283
284 cstack[thr].func[i] = slot;
285 cstack[thr].ret[i] = sp_1[stdepth];
286 sp_1[stdepth] = (int)profiler__asm_exit;
287
288 if (i > 0)
289 stat_away(cstack[thr].func[i - 1]);
290 stat_enter(slot);
291 }
292
293 void profiler__c_exit(int *sp)
294 {
295 int i, thr;
296
297 thr = findthread();
298 i = --cstack[thr].depth;
299 if (i < 0)
300 core(); /* call stack underflow */
301 sp[1] = cstack[thr].ret[i];
302
303 stat_exit(cstack[thr].func[i]);
304 if (i > 0)
305 stat_ret(cstack[thr].func[i - 1]);
306 #if THREADED
307 else
308 cstack[thr].tid = 0; /* free this stack */
309 #endif
310 }
311
312 #ifndef PROFILE_FILE
313 #error define PROFILE_FILE where to save statistic
314 #endif
315
316 static void mywrite(int fd,char *str)
317 {
318 int len, i;
319
320 for (len = strlen(str); len > 0; str += i, len -= i)
321 if ((i = write(fd,str,len)) < 0)
322 return;
323 }
324
325 void profiler_savestat(void)
326 {
327 int i, fd;
328 char buf[1024];
329
330 fd = open(PROFILE_FILE,O_CREAT | O_TRUNC | O_WRONLY,0666);
331 if (fd < 0) {
332 mywrite(2,"open " PROFILE_FILE " failed - ");
333 mywrite(2,strerror(errno));
334 mywrite(2,"\n");
335 return;
336 }
337
338 snprintf(buf,sizeof(buf),"\nProfiling statistic %s at time %lld:\n"
339 "\n%8s %10s %10s %10s %10s %20s %20s\n",PROFILE_FILE,curtime(),
340 "Function","Enters","Exits","Aways","Returns",
341 "Cycles_Total","Cycles_Inside");
342 mywrite(fd,buf);
343
344 for (i = 0; i < FUNCSMAX; i++)
345 if (arr[i].addr) {
346 snprintf(buf,sizeof(buf),"%08x %10d %10d %10d %10d %20lld %20lld\n",
347 arr[i].addr,arr[i].enters,arr[i].exits,
348 arr[i].aways,arr[i].rets,
349 arr[i].timetotal + (arr[i].enters-arr[i].exits) * curtime(),
350 arr[i].timetotal - arr[i].timeoutside + curtime() *
351 (arr[i].enters-arr[i].exits-arr[i].aways+arr[i].rets));
352 mywrite(fd,buf);
353 }
354
355 close(fd);
356 }

  ViewVC Help
Powered by ViewVC 1.1.26