"Fossies" - the Fresh Open Source Software Archive 
Member "atop-2.8.1/photoproc.c" (7 Jan 2023, 28444 Bytes) of package /linux/misc/atop-2.8.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "photoproc.c" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
2.7.1_vs_2.8.0.
1 /*
2 ** ATOP - System & Process Monitor
3 **
4 ** The program 'atop' offers the possibility to view the activity of
5 ** the system on system-level as well as process-/thread-level.
6 **
7 ** This source-file contains functions to read the process-administration
8 ** of every running process from kernel-space and extract the required
9 ** activity-counters.
10 ** ==========================================================================
11 ** Author: Gerlof Langeveld
12 ** E-mail: gerlof.langeveld@atoptool.nl
13 ** Date: November 1996
14 ** LINUX-port: June 2000
15 ** --------------------------------------------------------------------------
16 ** Copyright (C) 2000-2022 Gerlof Langeveld
17 **
18 ** This program is free software; you can redistribute it and/or modify it
19 ** under the terms of the GNU General Public License as published by the
20 ** Free Software Foundation; either version 2, or (at your option) any
21 ** later version.
22 **
23 ** This program is distributed in the hope that it will be useful, but
24 ** WITHOUT ANY WARRANTY; without even the implied warranty of
25 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
26 ** See the GNU General Public License for more details.
27 **
28 ** You should have received a copy of the GNU General Public License
29 ** along with this program; if not, write to the Free Software
30 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 ** --------------------------------------------------------------------------
32 */
33
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <dirent.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <ctype.h>
41 #include <time.h>
42 #include <stdlib.h>
43
44 #include "atop.h"
45 #include "photoproc.h"
46
47 #define SCANSTAT "%c %d %*d %*d %*d %*d " \
48 "%*d %lld %*d %lld %*d %lld " \
49 "%lld %*d %*d %d %d %*d " \
50 "%*d %ld %lld %lld %*d %*d " \
51 "%*d %*d %*d %*d %*d %*d " \
52 "%*d %*d %*d %*d %*d %*d " \
53 "%d %d %d %lld"
54
55 /* ATOP-extension line of /proc/pid/stat */
56 #define ATOPSTAT "%lld %llu %lld %llu %lld %llu %lld %llu " \
57 "%lld %llu %lld %llu %lld %lld"
58
59 static int procstat(struct tstat *, unsigned long long, char);
60 static int procstatus(struct tstat *);
61 static int procio(struct tstat *);
62 static int proccont(struct tstat *);
63 static void proccmd(struct tstat *);
64 static void procsmaps(struct tstat *);
65 static void procwchan(struct tstat *);
66 static count_t procschedstat(struct tstat *);
67 static int proccgroupv2(struct tstat *);
68 static struct cgroupv2vals *
69 alloccgroupv2(char *, int);
70 static struct cgroupv2vals *
71 findhashcgroupv2(char *, int *);
72 void fillcgroupv2(struct cgroupv2vals *, char *, char *, int);
73 int readcgroupv2(char *, char *, char *, int, long []);
74 static void wipecgroupv2(void);
75
76 unsigned long
77 photoproc(struct tstat *tasklist, int maxtask)
78 {
79 static int firstcall = 1;
80 static unsigned long long bootepoch;
81
82 register struct tstat *curtask;
83
84 FILE *fp;
85 DIR *dirp;
86 struct dirent *entp;
87 char origdir[1024], dockstat=0;
88 unsigned long tval=0;
89
90 /*
91 ** one-time initialization stuff
92 */
93 if (firstcall)
94 {
95 /*
96 ** check if this kernel offers io-statistics per task
97 */
98 regainrootprivs();
99
100 if ( (fp = fopen("/proc/1/io", "r")) )
101 {
102 supportflags |= IOSTAT;
103 fclose(fp);
104 }
105
106 /*
107 ** check if this kernel offers cgroups version 2
108 */
109 if ( (fp = fopen("/proc/1/cgroup", "r")) )
110 {
111 char line[128];
112
113 if (fgets(line, sizeof line, fp))
114 {
115 if (memcmp(line, "0::", 3) == 0) // equal?
116 supportflags |= CGROUPV2;
117 }
118
119 fclose(fp);
120 }
121
122 if (! droprootprivs())
123 mcleanstop(42, "failed to drop root privs\n");
124
125 /*
126 ** find epoch time of boot moment
127 */
128 bootepoch = getboot();
129
130 firstcall = 0;
131 }
132
133 /*
134 ** probe if the netatop module and (optionally) the
135 ** netatopd daemon are active
136 */
137 regainrootprivs();
138
139 netatop_probe();
140
141 if (supportflags & CGROUPV2)
142 wipecgroupv2();
143
144 if (! droprootprivs())
145 mcleanstop(42, "failed to drop root privs\n");
146
147 /*
148 ** read all subdirectory-names below the /proc directory
149 */
150 if ( getcwd(origdir, sizeof origdir) == NULL)
151 mcleanstop(53, "failed to save current dir\n");
152
153 if ( chdir("/proc") == -1)
154 mcleanstop(54, "failed to change to /proc\n");
155
156 dirp = opendir(".");
157
158 while ( (entp = readdir(dirp)) && tval < maxtask )
159 {
160 /*
161 ** skip non-numerical names
162 */
163 if (!isdigit(entp->d_name[0]))
164 continue;
165
166 /*
167 ** change to the process' subdirectory
168 */
169 if ( chdir(entp->d_name) != 0 )
170 continue;
171
172 /*
173 ** gather process-level information
174 */
175 curtask = tasklist+tval;
176
177 if ( !procstat(curtask, bootepoch, 1)) /* from /proc/pid/stat */
178 {
179 if ( chdir("..") == -1);
180 continue;
181 }
182
183 if ( !procstatus(curtask) ) /* from /proc/pid/status */
184 {
185 if ( chdir("..") == -1);
186 continue;
187 }
188
189 if ( !procio(curtask) ) /* from /proc/pid/io */
190 {
191 if ( chdir("..") == -1);
192 continue;
193 }
194
195 procschedstat(curtask); /* from /proc/pid/schedstat */
196 proccmd(curtask); /* from /proc/pid/cmdline */
197 dockstat += proccont(curtask); /* from /proc/pid/cpuset */
198
199 /*
200 ** cgroups v2: determine cgroup for process and register most
201 ** relevant limits
202 */
203 if (supportflags & CGROUPV2)
204 proccgroupv2(curtask);
205
206 /*
207 ** reading the smaps file for every process with every sample
208 ** is a really 'expensive' from a CPU consumption point-of-view,
209 ** so gathering this info is optional
210 */
211 if (calcpss)
212 procsmaps(curtask); /* from /proc/pid/smaps */
213
214 /*
215 ** determine thread's wchan, if wanted ('expensive' from
216 ** a CPU consumption point-of-view)
217 */
218 if (getwchan)
219 procwchan(curtask);
220
221 // read network stats from netatop
222 netatop_gettask(curtask->gen.tgid, 'g', curtask);
223
224 tval++; /* increment for process-level info */
225
226 /*
227 ** if needed (when number of threads is larger than 1):
228 ** read and fill new entries with thread-level info
229 */
230 if (curtask->gen.nthr > 1)
231 {
232 DIR *dirtask;
233 struct dirent *tent;
234
235 curtask->gen.nthrrun = 0;
236 curtask->gen.nthrslpi = 0;
237 curtask->gen.nthrslpu = 0;
238
239 /*
240 ** rundelay and blkdelay on process level only
241 ** concerns the delays of the main thread;
242 ** totalize the delays of all threads
243 */
244 curtask->cpu.rundelay = 0;
245 curtask->cpu.blkdelay = 0;
246
247 /*
248 ** open underlying task directory
249 */
250 if ( chdir("task") == 0 )
251 {
252 unsigned long cur_nth = 0;
253
254 dirtask = opendir(".");
255
256 /*
257 ** due to race condition, opendir() might
258 ** have failed (leave task and process-level
259 ** directories)
260 */
261 if( dirtask == NULL )
262 {
263 if(chdir("../..") == -1);
264 continue;
265 }
266
267 while ((tent=readdir(dirtask)) && tval<maxtask)
268 {
269 struct tstat *curthr = tasklist+tval;
270
271 /*
272 ** change to the thread's subdirectory
273 */
274 if ( tent->d_name[0] == '.' ||
275 chdir(tent->d_name) != 0 )
276 continue;
277
278 if ( !procstat(curthr, bootepoch, 0))
279 {
280 if ( chdir("..") == -1);
281 continue;
282 }
283
284 if ( !procstatus(curthr) )
285 {
286 if ( chdir("..") == -1);
287 continue;
288 }
289
290 if ( !procio(curthr) )
291 {
292 if ( chdir("..") == -1);
293 continue;
294 }
295
296 /*
297 ** determine thread's wchan, if wanted
298 ** ('expensive' from a CPU consumption
299 ** point-of-view)
300 */
301 if (getwchan)
302 procwchan(curthr);
303
304 // totalize delays of all threads
305 curtask->cpu.rundelay +=
306 procschedstat(curthr);
307
308 curtask->cpu.blkdelay +=
309 curthr->cpu.blkdelay;
310
311 strcpy(curthr->gen.container,
312 curtask->gen.container);
313
314 switch (curthr->gen.state)
315 {
316 case 'R':
317 curtask->gen.nthrrun += 1;
318 break;
319 case 'S':
320 curtask->gen.nthrslpi += 1;
321 break;
322 case 'I':
323 case 'D':
324 curtask->gen.nthrslpu += 1;
325 break;
326 }
327
328 curthr->gen.nthr = 1;
329
330 // read network stats from netatop
331 netatop_gettask(curthr->gen.pid, 't',
332 curthr);
333
334 // all stats read now
335 tval++; /* increment thread-level */
336 cur_nth++; /* increment # threads */
337
338 if ( chdir("..") == -1); /* thread */
339 }
340
341 closedir(dirtask);
342 if ( chdir("..") == -1); /* leave task */
343
344 // calibrate number of threads
345 curtask->gen.nthr = cur_nth;
346 }
347 }
348
349 if ( chdir("..") == -1); /* leave process-level directry */
350 }
351
352 closedir(dirp);
353
354 if ( chdir(origdir) == -1)
355 mcleanstop(55, "cannot change to %s\n", origdir);
356
357 if (dockstat)
358 supportflags |= DOCKSTAT;
359 else
360 supportflags &= ~DOCKSTAT;
361
362 return tval;
363 }
364
365 /*
366 ** count number of tasks in the system, i.e.
367 ** the number of processes plus the total number of threads
368 */
369 unsigned long
370 counttasks(void)
371 {
372 unsigned long nr=0;
373 char linebuf[256];
374 FILE *fp;
375 DIR *dirp;
376 struct dirent *entp;
377 char origdir[1024];
378
379 /*
380 ** determine total number of threads
381 */
382 if ( (fp = fopen("/proc/loadavg", "r")) != NULL)
383 {
384 if ( fgets(linebuf, sizeof(linebuf), fp) != NULL)
385 {
386 if ( sscanf(linebuf, "%*f %*f %*f %*d/%lu", &nr) < 1)
387 mcleanstop(53, "wrong /proc/loadavg\n");
388 }
389 else
390 mcleanstop(53, "unreadable /proc/loadavg\n");
391
392 fclose(fp);
393 }
394 else
395 mcleanstop(53, "can not open /proc/loadavg\n");
396
397
398 /*
399 ** add total number of processes
400 */
401 if ( getcwd(origdir, sizeof origdir) == NULL)
402 mcleanstop(53, "cannot determine cwd\n");
403
404 if ( chdir("/proc") == -1)
405 mcleanstop(53, "cannot change to /proc\n");
406
407 dirp = opendir(".");
408
409 while ( (entp = readdir(dirp)) )
410 {
411 /*
412 ** count subdirectory names under /proc starting with a digit
413 */
414 if (isdigit(entp->d_name[0]))
415 nr++;
416 }
417
418 closedir(dirp);
419
420 if ( chdir(origdir) == -1)
421 mcleanstop(53, "cannot change to %s\n", origdir);
422
423 return nr;
424 }
425
426 /*
427 ** open file "stat" and obtain required info
428 */
429 static int
430 procstat(struct tstat *curtask, unsigned long long bootepoch, char isproc)
431 {
432 FILE *fp;
433 int nr;
434 char line[4096], *p, *cmdhead, *cmdtail;
435
436 if ( (fp = fopen("stat", "r")) == NULL)
437 return 0;
438
439 if ( (nr = fread(line, 1, sizeof line-1, fp)) == 0)
440 {
441 fclose(fp);
442 return 0;
443 }
444
445 line[nr] = '\0'; // terminate string
446
447 /*
448 ** fetch command name
449 */
450 cmdhead = strchr (line, '(');
451 cmdtail = strrchr(line, ')');
452
453 if (!cmdhead || !cmdtail || cmdtail < cmdhead) // parsing failed?
454 {
455 fclose(fp);
456 return 0;
457 }
458
459 if ( (nr = cmdtail-cmdhead-1) > PNAMLEN)
460 nr = PNAMLEN;
461
462 p = curtask->gen.name;
463
464 memcpy(p, cmdhead+1, nr);
465 *(p+nr) = 0;
466
467 while ( (p = strchr(p, '\n')) != NULL)
468 {
469 *p = '?';
470 p++;
471 }
472
473 /*
474 ** fetch other values
475 */
476 curtask->gen.isproc = isproc;
477 curtask->cpu.rtprio = 0;
478 curtask->cpu.policy = 0;
479 curtask->gen.excode = 0;
480
481 sscanf(line, "%d", &(curtask->gen.pid)); /* fetch pid */
482
483 nr = sscanf(cmdtail+2, SCANSTAT,
484 &(curtask->gen.state), &(curtask->gen.ppid),
485 &(curtask->mem.minflt), &(curtask->mem.majflt),
486 &(curtask->cpu.utime), &(curtask->cpu.stime),
487 &(curtask->cpu.prio), &(curtask->cpu.nice),
488 &(curtask->gen.btime),
489 &(curtask->mem.vmem), &(curtask->mem.rmem),
490 &(curtask->cpu.curcpu), &(curtask->cpu.rtprio),
491 &(curtask->cpu.policy), &(curtask->cpu.blkdelay));
492
493 if (nr < 12) /* parsing failed? */
494 {
495 fclose(fp);
496 return 0;
497 }
498
499 /*
500 ** normalization
501 */
502 curtask->gen.btime = (curtask->gen.btime+bootepoch)/hertz;
503 curtask->cpu.prio += 100; /* was subtracted by kernel */
504 curtask->mem.vmem /= 1024;
505 curtask->mem.rmem *= pagesize/1024;
506
507 fclose(fp);
508
509 switch (curtask->gen.state)
510 {
511 case 'R':
512 curtask->gen.nthrrun = 1;
513 break;
514 case 'S':
515 curtask->gen.nthrslpi = 1;
516 break;
517 case 'I':
518 case 'D':
519 curtask->gen.nthrslpu = 1;
520 break;
521 }
522
523 return 1;
524 }
525
526 /*
527 ** open file "status" and obtain required info
528 */
529 static int
530 procstatus(struct tstat *curtask)
531 {
532 FILE *fp;
533 char line[4096];
534
535 if ( (fp = fopen("status", "r")) == NULL)
536 return 0;
537
538 curtask->gen.nthr = 1; /* for compat with 2.4 */
539 curtask->cpu.sleepavg = 0; /* for compat with 2.4 */
540 curtask->mem.vgrow = 0; /* calculated later */
541 curtask->mem.rgrow = 0; /* calculated later */
542
543 while (fgets(line, sizeof line, fp))
544 {
545 if (memcmp(line, "Tgid:", 5) ==0)
546 {
547 sscanf(line, "Tgid: %d", &(curtask->gen.tgid));
548 continue;
549 }
550
551 if (memcmp(line, "Pid:", 4) ==0)
552 {
553 sscanf(line, "Pid: %d", &(curtask->gen.pid));
554 continue;
555 }
556
557 if (memcmp(line, "SleepAVG:", 9)==0)
558 {
559 sscanf(line, "SleepAVG: %d%%",
560 &(curtask->cpu.sleepavg));
561 continue;
562 }
563
564 if (memcmp(line, "Uid:", 4)==0)
565 {
566 sscanf(line, "Uid: %d %d %d %d",
567 &(curtask->gen.ruid), &(curtask->gen.euid),
568 &(curtask->gen.suid), &(curtask->gen.fsuid));
569 continue;
570 }
571
572 if (memcmp(line, "Gid:", 4)==0)
573 {
574 sscanf(line, "Gid: %d %d %d %d",
575 &(curtask->gen.rgid), &(curtask->gen.egid),
576 &(curtask->gen.sgid), &(curtask->gen.fsgid));
577 continue;
578 }
579
580 if (memcmp(line, "envID:", 6) ==0)
581 {
582 sscanf(line, "envID: %d", &(curtask->gen.ctid));
583 continue;
584 }
585
586 if (memcmp(line, "VPid:", 5) ==0)
587 {
588 sscanf(line, "VPid: %d", &(curtask->gen.vpid));
589 continue;
590 }
591
592 if (memcmp(line, "Threads:", 8)==0)
593 {
594 sscanf(line, "Threads: %d", &(curtask->gen.nthr));
595 continue;
596 }
597
598 if (memcmp(line, "VmData:", 7)==0)
599 {
600 sscanf(line, "VmData: %lld", &(curtask->mem.vdata));
601 continue;
602 }
603
604 if (memcmp(line, "VmStk:", 6)==0)
605 {
606 sscanf(line, "VmStk: %lld", &(curtask->mem.vstack));
607 continue;
608 }
609
610 if (memcmp(line, "VmExe:", 6)==0)
611 {
612 sscanf(line, "VmExe: %lld", &(curtask->mem.vexec));
613 continue;
614 }
615
616 if (memcmp(line, "VmLib:", 6)==0)
617 {
618 sscanf(line, "VmLib: %lld", &(curtask->mem.vlibs));
619 continue;
620 }
621
622 if (memcmp(line, "VmSwap:", 7)==0)
623 {
624 sscanf(line, "VmSwap: %lld", &(curtask->mem.vswap));
625 continue;
626 }
627
628 if (memcmp(line, "VmLck:", 6)==0)
629 {
630 sscanf(line, "VmLck: %lld", &(curtask->mem.vlock));
631 continue;
632 }
633
634 if (memcmp(line, "SigQ:", 5)==0)
635 break;
636 }
637
638 fclose(fp);
639 return 1;
640 }
641
642 /*
643 ** open file "io" (>= 2.6.20) and obtain required info
644 */
645 #define IO_READ "read_bytes:"
646 #define IO_WRITE "write_bytes:"
647 #define IO_CWRITE "cancelled_write_bytes:"
648 static int
649 procio(struct tstat *curtask)
650 {
651 FILE *fp;
652 char line[4096];
653 count_t dskrsz=0, dskwsz=0, dskcwsz=0;
654
655 if (supportflags & IOSTAT)
656 {
657 regainrootprivs();
658
659 if ( (fp = fopen("io", "r")) )
660 {
661 while (fgets(line, sizeof line, fp))
662 {
663 if (memcmp(line, IO_READ,
664 sizeof IO_READ -1) == 0)
665 {
666 sscanf(line, "%*s %llu", &dskrsz);
667 dskrsz /= 512; // in sectors
668 continue;
669 }
670
671 if (memcmp(line, IO_WRITE,
672 sizeof IO_WRITE -1) == 0)
673 {
674 sscanf(line, "%*s %llu", &dskwsz);
675 dskwsz /= 512; // in sectors
676 continue;
677 }
678
679 if (memcmp(line, IO_CWRITE,
680 sizeof IO_CWRITE -1) == 0)
681 {
682 sscanf(line, "%*s %llu", &dskcwsz);
683 dskcwsz /= 512; // in sectors
684 continue;
685 }
686 }
687
688 fclose(fp);
689
690 curtask->dsk.rsz = dskrsz;
691 curtask->dsk.rio = dskrsz; // to enable sort
692 curtask->dsk.wsz = dskwsz;
693 curtask->dsk.wio = dskwsz; // to enable sort
694 curtask->dsk.cwsz = dskcwsz;
695 }
696
697 if (! droprootprivs())
698 mcleanstop(42, "failed to drop root privs\n");
699 }
700
701 return 1;
702 }
703
704 /*
705 ** store the full command line; the command-line may contain:
706 ** - null-bytes as a separator between the arguments
707 ** - newlines (e.g. arguments for awk or sed)
708 ** - tabs (e.g. arguments for awk or sed)
709 ** these special bytes will be converted to spaces
710 */
711 static void
712 proccmd(struct tstat *curtask)
713 {
714 FILE *fp;
715 register int i, nr;
716
717 memset(curtask->gen.cmdline, 0, CMDLEN+1);
718
719 if ( (fp = fopen("cmdline", "r")) != NULL)
720 {
721 register char *p = curtask->gen.cmdline;
722
723 nr = fread(p, 1, CMDLEN, fp);
724 fclose(fp);
725
726 if (nr >= 0) /* anything read ? */
727 {
728 for (i=0; i < nr-1; i++, p++)
729 {
730 switch (*p)
731 {
732 case '\0':
733 case '\n':
734 case '\t':
735 *p = ' ';
736 }
737 }
738 }
739 }
740 }
741
742
743 /*
744 ** determine the wait channel of a sleeping thread
745 ** i.e. the name of the kernel function in which the thread
746 ** has been put in sleep state)
747 */
748 static void
749 procwchan(struct tstat *curtask)
750 {
751 FILE *fp;
752 register int nr = 0;
753
754 if ( (fp = fopen("wchan", "r")) != NULL)
755 {
756
757 nr = fread(curtask->cpu.wchan, 1,
758 sizeof(curtask->cpu.wchan)-1, fp);
759 if (nr < 0)
760 nr = 0;
761 fclose(fp);
762 }
763
764 curtask->cpu.wchan[nr] = 0;
765 }
766
767
768 /*
769 ** store the container ID, retrieved from the 'cpuset'
770 ** that might look like this:
771 **
772 ** In case of Docker:
773 ** /system.slice/docker-af78216c2a230f1aa5dce56cbf[SNAP].scope (e.g. CentOS)
774 ** /docker/af78216c2a230f1aa5dce56cbf[SNAP] (e.g. openSUSE and Ubuntu))
775 **
776 ** In case of Docker created by K8s:
777 ** /kubepods/burstable/pod07dbb922-[SNAP]/223dc5e15b[SNAP]
778 **
779 ** In case of podman:
780 ** /machine.slice/libpod-0b5836e9ea98aefd89481123bi[SNAP].scope
781 **
782 ** In general:
783 ** - search for last '/' (basename)
784 ** - check if '/' followed by 'docker-': then skip 'docker-'
785 ** - check if '/' followed by 'libpod-': then skip 'libpod-'
786 ** - take 12 positions for the container ID
787 **
788 ** Return value:
789 ** 0 - no container
790 ** 1 - container
791 */
792 #define CIDSIZE 12
793 #define SHA256SIZE 64
794 #define DOCKPREFIX "docker-"
795 #define PODMANPREFIX "libpod-"
796
797 static int
798 proccont(struct tstat *curtask)
799 {
800 FILE *fp;
801 char line[256];
802
803 if ( (fp = fopen("cpuset", "r")) != NULL)
804 {
805 register char *p;
806
807 if ( fgets(line, sizeof line, fp) )
808 {
809 fclose(fp);
810
811 // fast check for processes not using cpuset
812 // i.e. anyhow not container
813 if (memcmp(line, "/\n", 3) == 0)
814 return 0;
815
816 // possibly container: find basename in path and
817 // verify that its minimum length is the size of SHA256
818 if ( (p = strrchr(line, '/')) != NULL &&
819 strlen(p) >= SHA256SIZE)
820 {
821 p++;
822
823 if (memcmp(p, DOCKPREFIX,
824 sizeof(DOCKPREFIX)-1) == 0)
825 {
826 p += sizeof(DOCKPREFIX)-1;
827 }
828 else
829 {
830 if (memcmp(p, PODMANPREFIX,
831 sizeof(PODMANPREFIX)-1) == 0)
832 {
833 p += sizeof(PODMANPREFIX)-1;
834 }
835 }
836
837 memcpy(curtask->gen.container, p, CIDSIZE);
838 return 1;
839 }
840 }
841 else
842 {
843 fclose(fp);
844 }
845 }
846
847 return 0;
848 }
849
850
851 /*
852 ** open file "smaps" and obtain required info
853 ** since Linux-4.14, kernel supports "smaps_rollup" which has better
854 ** performence. check "smaps_rollup" in first call
855 ** if kernel supports "smaps_rollup", use "smaps_rollup" instead
856 */
857 static void
858 procsmaps(struct tstat *curtask)
859 {
860 FILE *fp;
861 char line[4096];
862 count_t pssval;
863 static int procsmaps_firstcall = 1;
864 static char *smapsfile = "smaps";
865
866 if (procsmaps_firstcall)
867 {
868 regainrootprivs();
869 if ( (fp = fopen("/proc/1/smaps_rollup", "r")) )
870 {
871 smapsfile = "smaps_rollup";
872 fclose(fp);
873 }
874
875 procsmaps_firstcall = 0;
876 }
877
878 /*
879 ** open the file (always succeeds, even if no root privs)
880 */
881 regainrootprivs();
882
883 if ( (fp = fopen(smapsfile, "r")) )
884 {
885 curtask->mem.pmem = 0;
886
887 while (fgets(line, sizeof line, fp))
888 {
889 if (memcmp(line, "Pss:", 4) != 0)
890 continue;
891
892 // PSS line found to be accumulated
893 sscanf(line, "Pss: %llu", &pssval);
894 curtask->mem.pmem += pssval;
895 }
896
897 /*
898 ** verify if fgets returned NULL due to error i.s.o. EOF
899 */
900 if (ferror(fp))
901 curtask->mem.pmem = (unsigned long long)-1LL;
902
903 fclose(fp);
904 }
905 else
906 {
907 curtask->mem.pmem = (unsigned long long)-1LL;
908 }
909
910 if (! droprootprivs())
911 mcleanstop(42, "failed to drop root privs\n");
912 }
913
914 /*
915 ** get run_delay from /proc/<pid>/schedstat
916 ** ref: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/scheduler/sched-stats.rst?h=v5.7-rc6
917 */
918 static count_t
919 procschedstat(struct tstat *curtask)
920 {
921 FILE *fp;
922 char line[4096];
923 count_t runtime, rundelay = 0;
924 unsigned long pcount;
925 static char *schedstatfile = "schedstat";
926
927 /*
928 ** open the schedstat file
929 */
930 if ( (fp = fopen(schedstatfile, "r")) )
931 {
932 curtask->cpu.rundelay = 0;
933
934 if (fgets(line, sizeof line, fp))
935 {
936 sscanf(line, "%llu %llu %lu\n",
937 &runtime, &rundelay, &pcount);
938
939 curtask->cpu.rundelay = rundelay;
940 }
941
942 /*
943 ** verify if fgets returned NULL due to error i.s.o. EOF
944 */
945 if (ferror(fp))
946 curtask->cpu.rundelay = 0;
947
948 fclose(fp);
949 }
950 else
951 {
952 curtask->cpu.rundelay = 0;
953 }
954
955 return curtask->cpu.rundelay;
956 }
957
958 /*
959 ** CGROUP V2 specific items
960 */
961 #define CGROUPROOT "/sys/fs/cgroup"
962 #define CGROUPNHASH 64
963 #define CGROUPMASK 0x3f
964 #define MAXSLASH 16 // max. number of slashes in relative path
965
966 struct cgroupv2vals {
967 char *path;
968
969 int cpuweight; // -1=max, -2=undefined
970
971 int cpumax; // -1=max, -2=undefined (perc)
972 int cpumaxr; // -1=max, -2=undefined (perc)
973
974 long long memmax; // -1=max, -2=undefined (KiB)
975 long long memmaxr; // -1=max, -2=undefined (KiB)
976
977 long long swpmax; // -1=max, -2=undefined (KiB)
978 long long swpmaxr; // -1=max, -2=undefined (KiB)
979
980 struct cgroupv2vals *next;
981 };
982
983 static struct cgroupv2vals *cgrouphash[CGROUPNHASH];
984
985 /*
986 ** get cgroup related to process from /proc/<pid>/cgroup
987 ** return code: 0 - no cgroup v2 in use
988 ** 1 - cgroup v2 in use
989 ** 2 - cgroup version can not be determined
990 */
991 static int
992 proccgroupv2(struct tstat *curtask)
993 {
994 FILE *fp;
995 char line[1024], *relpath, abspath[1200];
996 int hash, pathlen, restlen, nslash;
997 struct cgroupv2vals *pvals = NULL, *ptarget;
998 char *p, *slashes[MAXSLASH];
999
1000 /*
1001 ** open the cgroup file of the current process and
1002 ** read one line that should start with '0::' for cgroup v2
1003 */
1004 if ( (fp = fopen("cgroup", "r")) )
1005 {
1006 if (fgets(line, sizeof line, fp))
1007 {
1008 if ( memcmp(line, "0::", 3) ) // unequal?
1009 {
1010 fclose(fp);
1011 curtask->gen.cgpath[0] = '\0';
1012 return 0; // no cgroupv2 support
1013 }
1014 }
1015 fclose(fp);
1016
1017 line[ strlen(line)-1 ] = '\0'; // remove newline
1018
1019 relpath = line+3;
1020
1021 strncpy(curtask->gen.cgpath, relpath,
1022 sizeof curtask->gen.cgpath);
1023 curtask->gen.cgpath[sizeof curtask->gen.cgpath -1] = '\0';
1024 }
1025 else // open failed; no permission
1026 {
1027 curtask->gen.cgpath[0] = '\0';
1028 return 2;
1029 }
1030
1031 /*
1032 ** cgroup v2 pathname of this process is known;
1033 ** prepare absolute pathname of cgroup
1034 */
1035 pathlen = snprintf(abspath, sizeof abspath, "%s%s/",
1036 CGROUPROOT, relpath);
1037 restlen = sizeof abspath - pathlen -1;
1038 abspath[sizeof abspath - 1] = '\0'; // guarantee delimiter
1039 relpath = abspath + sizeof CGROUPROOT - 1;
1040
1041 /*
1042 ** cycle through all directory levels for values that
1043 ** might limit the values in the current cgroup (e.g. cpu.max)
1044 */
1045 for (nslash=0, p=relpath; *p && nslash<MAXSLASH; p++)
1046 {
1047 if (*p == '/') // find all slashes in path
1048 slashes[nslash++] = p;
1049 }
1050
1051 for (nslash--, ptarget=NULL; nslash > 0; nslash--)
1052 {
1053 *slashes[nslash] = '\0';
1054
1055 pvals = findhashcgroupv2(relpath, &hash); // search in cache
1056
1057 if (!pvals) // not found in cache
1058 {
1059 // allocate new cache entry
1060 pvals = alloccgroupv2(relpath, hash);
1061
1062 // fill info in new cache entry
1063 fillcgroupv2(pvals, abspath, slashes[nslash],
1064 restlen);
1065 }
1066
1067
1068 /*
1069 ** if the target cgroup is not defined,
1070 ** determine the target cgroup (lowest level dir)
1071 */
1072 if (! ptarget)
1073 {
1074 ptarget = pvals;
1075 continue;
1076 }
1077
1078 /*
1079 ** in case of a higher cgroup, check the restrictive values
1080 ** for the target cgroup
1081 */
1082 switch (pvals->cpumax)
1083 {
1084 case -1:
1085 if (ptarget->cpumaxr == -2)
1086 ptarget->cpumaxr = -1;
1087 break;
1088 case -2:
1089 break;
1090 default:
1091 if (ptarget->cpumaxr == -1 || ptarget->cpumaxr == -2)
1092 {
1093 ptarget->cpumaxr = pvals->cpumax;
1094 break;
1095 }
1096
1097 if (ptarget->cpumaxr > pvals->cpumax)
1098 ptarget->cpumaxr = pvals->cpumax;
1099 }
1100
1101 switch (pvals->memmax)
1102 {
1103 case -1:
1104 if (ptarget->memmaxr == -2)
1105 ptarget->memmaxr = -1;
1106 break;
1107 case -2:
1108 break;
1109 default:
1110 if (ptarget->memmaxr == -1 || ptarget->memmaxr == -2)
1111 {
1112 ptarget->memmaxr = pvals->memmax;
1113 break;
1114 }
1115
1116 if (ptarget->memmaxr > pvals->memmax)
1117 ptarget->memmaxr = pvals->memmax;
1118 }
1119
1120 switch (pvals->swpmax)
1121 {
1122 case -1:
1123 if (ptarget->swpmaxr == -2)
1124 ptarget->swpmaxr = -1;
1125 break;
1126 case -2:
1127 break;
1128 default:
1129 if (ptarget->swpmaxr == -1 || ptarget->swpmaxr == -2)
1130 {
1131 ptarget->swpmaxr = pvals->swpmax;
1132 break;
1133 }
1134
1135 if (ptarget->swpmaxr > pvals->swpmax)
1136 ptarget->swpmaxr = pvals->swpmax;
1137 }
1138
1139
1140 }
1141
1142 curtask->cpu.cgcpuweight = ptarget->cpuweight;
1143
1144 curtask->cpu.cgcpumax = ptarget->cpumax;
1145 curtask->cpu.cgcpumaxr = ptarget->cpumaxr;
1146
1147 curtask->mem.cgmemmax = ptarget->memmax;
1148 curtask->mem.cgmemmaxr = ptarget->memmaxr;
1149
1150 curtask->mem.cgswpmax = ptarget->swpmax;
1151 curtask->mem.cgswpmaxr = ptarget->swpmaxr;
1152
1153 return 1;
1154 }
1155
1156 /*
1157 ** determine the most relevant values of this cgroup
1158 */
1159 void
1160 fillcgroupv2(struct cgroupv2vals *pvals, char *abspath, char *extpath,
1161 int restlen)
1162 {
1163 long retvals[2];
1164
1165 *extpath++ = '/'; // replace slash
1166
1167 /*
1168 ** get cpu.weight limitation
1169 */
1170 pvals->cpuweight = -2; // initial value (undefined)
1171
1172 switch (readcgroupv2(abspath, extpath, "cpu.weight", restlen, retvals))
1173 {
1174 case 1:
1175 pvals->cpuweight = retvals[0];
1176 break;
1177 }
1178
1179 /*
1180 ** get cpu.max limitation
1181 */
1182 pvals->cpumax = -2; // initial value (undefined)
1183
1184 switch (readcgroupv2(abspath, extpath, "cpu.max", restlen, retvals))
1185 {
1186 case 2:
1187 if (retvals[0] == -1)
1188 pvals->cpumax = -1;
1189 else
1190 pvals->cpumax = retvals[0] * 100 / retvals[1];
1191 break;
1192 }
1193
1194 pvals->cpumaxr = pvals->cpumax; // set temporary restrictive
1195
1196 /*
1197 ** get memory.max limitation
1198 */
1199 pvals->memmax = -2; // initial value (undefined)
1200
1201 switch (readcgroupv2(abspath, extpath, "memory.max", restlen, retvals))
1202 {
1203 case 1:
1204 if (retvals[0] == -1)
1205 pvals->memmax = -1;
1206 else
1207 pvals->memmax = retvals[0] / 1024; // KiB
1208 break;
1209 }
1210
1211 pvals->memmaxr = pvals->memmax; // set temporary restrictive
1212
1213 /*
1214 ** get memory.swap.max limitation
1215 */
1216 pvals->swpmax = -2; // initial value (undefined)
1217
1218 switch (readcgroupv2(abspath, extpath, "memory.swap.max", restlen, retvals))
1219 {
1220 case 1:
1221 if (retvals[0] == -1)
1222 pvals->swpmax = -1;
1223 else
1224 pvals->swpmax = retvals[0] / 1024; // KiB
1225 break;
1226 }
1227
1228 pvals->swpmaxr = pvals->swpmax; // set temporary restrictive
1229 }
1230
1231 /*
1232 ** read line with one or two values
1233 ** and fill one or (maximum) two values
1234 ** in retvals (value 'max' converted into -1)
1235 **
1236 ** return value: number of entries in retvals filled
1237 */
1238 int
1239 readcgroupv2(char *abspath, char *extpath, char *fname, int restlen,
1240 long retvals[])
1241 {
1242 char line[64];
1243 int n;
1244 FILE *fp;
1245
1246 strncpy(extpath, fname, restlen); // complete absolute path of file
1247
1248 if ( (fp = fopen(abspath, "r")) )
1249 {
1250 char firststr[16];
1251
1252 if (! fgets(line, sizeof line, fp))
1253 {
1254 fclose(fp);
1255 return 0;
1256 }
1257
1258 fclose(fp);
1259
1260 switch (n = sscanf(line, "%15s %ld", firststr, &retvals[1]))
1261 {
1262 case 0:
1263 return 0;
1264 case 1:
1265 case 2:
1266 if ( strcmp(firststr, "max") == 0)
1267 retvals[0] = -1;
1268 else
1269 retvals[0] = atol(firststr);
1270
1271 return n;
1272 default:
1273 return 0;
1274 }
1275 }
1276
1277 return 0;
1278 }
1279
1280 /*
1281 ** find existing info about cgroup in cgroupv2 cache
1282 **
1283 ** return value: pointer to structure that has been found, or
1284 ** NULL (not found)
1285 */
1286 static struct cgroupv2vals *
1287 findhashcgroupv2(char *relpath, int *phash)
1288 {
1289 struct cgroupv2vals *p;
1290 char *s;
1291 int hash = 0;
1292
1293 for (s = relpath; *s++; ) // calculate simple hash for this cgroup
1294 hash += *s; // by accumulating all path characters
1295
1296 *phash = hash;
1297
1298 // search hash list of earlier accessed cgroups within this interval
1299 for (p = cgrouphash[hash&CGROUPMASK]; p; p = p->next)
1300 {
1301 if ( strcmp(relpath, p->path) == 0) // found?
1302 return p;
1303 }
1304
1305 return NULL;
1306 }
1307
1308 /*
1309 ** allocate new hash entry in cgroupv2 cache
1310 **
1311 ** return value: pointer to newly allocated structure
1312 */
1313 static struct cgroupv2vals *
1314 alloccgroupv2(char *relpath, int hash)
1315 {
1316 struct cgroupv2vals *p;
1317
1318 p = malloc(sizeof(struct cgroupv2vals));
1319
1320 ptrverify(p, "Malloc failed for new cgroup values\n");
1321
1322 p->path = malloc(strlen(relpath)+1);
1323
1324 ptrverify(p->path, "Malloc failed for path in new cgroup values\n");
1325
1326 strcpy(p->path, relpath);
1327
1328 p->next = cgrouphash[hash&CGROUPMASK]; // add new entry to hash chain
1329 cgrouphash[hash&CGROUPMASK] = p;
1330
1331 return p;
1332 }
1333
1334 /*
1335 ** clear entire cgroupv2 cache
1336 */
1337 static void
1338 wipecgroupv2(void)
1339 {
1340 int i;
1341 struct cgroupv2vals *p, *pnext;
1342
1343 for (i=0; i < CGROUPNHASH; i++)
1344 {
1345 for (p = cgrouphash[i]; p; p = pnext)
1346 {
1347 pnext = p->next;
1348 free(p->path);
1349 free(p);
1350 }
1351
1352 cgrouphash[i] = 0;
1353 }
1354 }