From 7fa47a962a6415748566f71f453378644de2cc69 Mon Sep 17 00:00:00 2001 From: Ingo Oppermann Date: Tue, 16 Jul 2024 08:14:19 +0200 Subject: [PATCH] Add basic nvidia-smi parser --- psutil/gpu/gpu.go | 33 + psutil/gpu/nvidia/fixtures/data1.xml | 725 ++++++++++++++++++++++ psutil/gpu/nvidia/fixtures/data2.xml | 890 +++++++++++++++++++++++++++ psutil/gpu/nvidia/fixtures/data3.xml | 242 ++++++++ psutil/gpu/nvidia/nvidia.go | 284 +++++++++ psutil/gpu/nvidia/nvidia_test.go | 102 +++ psutil/psutil.go | 55 ++ resources/resources_test.go | 8 + 8 files changed, 2339 insertions(+) create mode 100644 psutil/gpu/gpu.go create mode 100644 psutil/gpu/nvidia/fixtures/data1.xml create mode 100644 psutil/gpu/nvidia/fixtures/data2.xml create mode 100644 psutil/gpu/nvidia/fixtures/data3.xml create mode 100644 psutil/gpu/nvidia/nvidia.go create mode 100644 psutil/gpu/nvidia/nvidia_test.go diff --git a/psutil/gpu/gpu.go b/psutil/gpu/gpu.go new file mode 100644 index 00000000..7feb19bd --- /dev/null +++ b/psutil/gpu/gpu.go @@ -0,0 +1,33 @@ +package gpu + +import "errors" + +type Process struct { + PID int32 + Memory uint64 +} + +type Stats struct { + Name string + Architecture string + + MemoryTotal uint64 + MemoryUsed uint64 + + Usage float64 + MemoryUsage float64 + EncoderUsage float64 + DecoderUsage float64 + + Process []Process + + Extension interface{} +} + +type GPU interface { + Count() (int, error) + Stats() ([]Stats, error) + Process(pid int32) (Process, error) +} + +var ErrProcessNotFound = errors.New("process not found") diff --git a/psutil/gpu/nvidia/fixtures/data1.xml b/psutil/gpu/nvidia/fixtures/data1.xml new file mode 100644 index 00000000..10b35010 --- /dev/null +++ b/psutil/gpu/nvidia/fixtures/data1.xml @@ -0,0 +1,725 @@ + + + + Mon Jul 15 13:50:34 2024 + 495.29.05 + 11.5 + 1 + + NVIDIA GeForce GTX 1080 + GeForce + Pascal + Disabled + Disabled + Disabled + + N/A + N/A + + + None + + Disabled + 4000 + + N/A + N/A + + N/A + GPU-d8249424-2ed0-0499-2d47-8c6905e3ef5b + 0 + 86.04.17.00.01 + No + 0x100 + N/A + 0 + + G001.0000.01.03 + 1.1 + N/A + N/A + + + N/A + N/A + + N/A + + None + N/A + + + N/A + + + 01 + 00 + 0000 + 1B8010DE + 00000000:01:00.0 + 119E10DE + + + 3 + 3 + + + 16x + 16x + + + + N/A + N/A + + 0 + 0 + 106000 KB/s + 309000 KB/s + + 44 % + P2 + + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + + + 8119 MiB + 918 MiB + 7201 MiB + + + 256 MiB + 2 MiB + 254 MiB + + Default + + 15 % + 7 % + 3 % + 0 % + + + 0 + 0 + 0 + + + 0 + 0 + 0 + + + N/A + N/A + + + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + + + + N/A + N/A + + + N/A + N/A + + N/A + N/A + + N/A + + 55 C + 99 C + 96 C + N/A + 83 C + N/A + N/A + + + 60 C + 92 C + + + P2 + Supported + 42.64 W + 180.00 W + 180.00 W + 180.00 W + 90.00 W + 180.00 W + + + 1607 MHz + 1607 MHz + 4513 MHz + 1442 MHz + + + N/A + N/A + + + N/A + N/A + + + 1911 MHz + 1911 MHz + 5005 MHz + 1708 MHz + + + N/A + + + N/A + N/A + + + N/A + + + + 5005 MHz + 1911 MHz + 1898 MHz + 1885 MHz + 1873 MHz + 1860 MHz + 1847 MHz + 1835 MHz + 1822 MHz + 1809 MHz + 1797 MHz + 1784 MHz + 1771 MHz + 1759 MHz + 1746 MHz + 1733 MHz + 1721 MHz + 1708 MHz + 1695 MHz + 1683 MHz + 1670 MHz + 1657 MHz + 1645 MHz + 1632 MHz + 1620 MHz + 1607 MHz + 1594 MHz + 1582 MHz + 1569 MHz + 1556 MHz + 1544 MHz + 1531 MHz + 1518 MHz + 1506 MHz + 1493 MHz + 1480 MHz + 1468 MHz + 1455 MHz + 1442 MHz + 1430 MHz + 1417 MHz + 1404 MHz + 1392 MHz + 1379 MHz + 1366 MHz + 1354 MHz + 1341 MHz + 1328 MHz + 1316 MHz + 1303 MHz + 1290 MHz + 1278 MHz + 1265 MHz + 1252 MHz + 1240 MHz + 1227 MHz + 1215 MHz + 1202 MHz + 1189 MHz + 1177 MHz + 1164 MHz + 1151 MHz + 1139 MHz + 1126 MHz + 1113 MHz + 1101 MHz + 1088 MHz + 1075 MHz + 1063 MHz + 1050 MHz + 1037 MHz + 1025 MHz + 1012 MHz + 999 MHz + 987 MHz + 974 MHz + 961 MHz + 949 MHz + 936 MHz + 923 MHz + 911 MHz + 898 MHz + 885 MHz + 873 MHz + 860 MHz + 847 MHz + 835 MHz + 822 MHz + 810 MHz + 797 MHz + 784 MHz + 772 MHz + 759 MHz + 746 MHz + 734 MHz + 721 MHz + 708 MHz + 696 MHz + 683 MHz + 670 MHz + 658 MHz + 645 MHz + 632 MHz + 620 MHz + 607 MHz + 594 MHz + 582 MHz + 569 MHz + 556 MHz + 544 MHz + 531 MHz + 518 MHz + 506 MHz + 493 MHz + 480 MHz + 468 MHz + 455 MHz + 442 MHz + 430 MHz + 417 MHz + 405 MHz + 392 MHz + 379 MHz + 367 MHz + 354 MHz + 341 MHz + 329 MHz + 316 MHz + 303 MHz + 291 MHz + 278 MHz + 265 MHz + 253 MHz + 240 MHz + 227 MHz + 215 MHz + 202 MHz + 189 MHz + 177 MHz + 164 MHz + 151 MHz + 139 MHz + + + 4513 MHz + 1911 MHz + 1898 MHz + 1885 MHz + 1873 MHz + 1860 MHz + 1847 MHz + 1835 MHz + 1822 MHz + 1809 MHz + 1797 MHz + 1784 MHz + 1771 MHz + 1759 MHz + 1746 MHz + 1733 MHz + 1721 MHz + 1708 MHz + 1695 MHz + 1683 MHz + 1670 MHz + 1657 MHz + 1645 MHz + 1632 MHz + 1620 MHz + 1607 MHz + 1594 MHz + 1582 MHz + 1569 MHz + 1556 MHz + 1544 MHz + 1531 MHz + 1518 MHz + 1506 MHz + 1493 MHz + 1480 MHz + 1468 MHz + 1455 MHz + 1442 MHz + 1430 MHz + 1417 MHz + 1404 MHz + 1392 MHz + 1379 MHz + 1366 MHz + 1354 MHz + 1341 MHz + 1328 MHz + 1316 MHz + 1303 MHz + 1290 MHz + 1278 MHz + 1265 MHz + 1252 MHz + 1240 MHz + 1227 MHz + 1215 MHz + 1202 MHz + 1189 MHz + 1177 MHz + 1164 MHz + 1151 MHz + 1139 MHz + 1126 MHz + 1113 MHz + 1101 MHz + 1088 MHz + 1075 MHz + 1063 MHz + 1050 MHz + 1037 MHz + 1025 MHz + 1012 MHz + 999 MHz + 987 MHz + 974 MHz + 961 MHz + 949 MHz + 936 MHz + 923 MHz + 911 MHz + 898 MHz + 885 MHz + 873 MHz + 860 MHz + 847 MHz + 835 MHz + 822 MHz + 810 MHz + 797 MHz + 784 MHz + 772 MHz + 759 MHz + 746 MHz + 734 MHz + 721 MHz + 708 MHz + 696 MHz + 683 MHz + 670 MHz + 658 MHz + 645 MHz + 632 MHz + 620 MHz + 607 MHz + 594 MHz + 582 MHz + 569 MHz + 556 MHz + 544 MHz + 531 MHz + 518 MHz + 506 MHz + 493 MHz + 480 MHz + 468 MHz + 455 MHz + 442 MHz + 430 MHz + 417 MHz + 405 MHz + 392 MHz + 379 MHz + 367 MHz + 354 MHz + 341 MHz + 329 MHz + 316 MHz + 303 MHz + 291 MHz + 278 MHz + 265 MHz + 253 MHz + 240 MHz + 227 MHz + 215 MHz + 202 MHz + 189 MHz + 177 MHz + 164 MHz + 151 MHz + 139 MHz + + + 810 MHz + 1911 MHz + 1898 MHz + 1885 MHz + 1873 MHz + 1860 MHz + 1847 MHz + 1835 MHz + 1822 MHz + 1809 MHz + 1797 MHz + 1784 MHz + 1771 MHz + 1759 MHz + 1746 MHz + 1733 MHz + 1721 MHz + 1708 MHz + 1695 MHz + 1683 MHz + 1670 MHz + 1657 MHz + 1645 MHz + 1632 MHz + 1620 MHz + 1607 MHz + 1594 MHz + 1582 MHz + 1569 MHz + 1556 MHz + 1544 MHz + 1531 MHz + 1518 MHz + 1506 MHz + 1493 MHz + 1480 MHz + 1468 MHz + 1455 MHz + 1442 MHz + 1430 MHz + 1417 MHz + 1404 MHz + 1392 MHz + 1379 MHz + 1366 MHz + 1354 MHz + 1341 MHz + 1328 MHz + 1316 MHz + 1303 MHz + 1290 MHz + 1278 MHz + 1265 MHz + 1252 MHz + 1240 MHz + 1227 MHz + 1215 MHz + 1202 MHz + 1189 MHz + 1177 MHz + 1164 MHz + 1151 MHz + 1139 MHz + 1126 MHz + 1113 MHz + 1101 MHz + 1088 MHz + 1075 MHz + 1063 MHz + 1050 MHz + 1037 MHz + 1025 MHz + 1012 MHz + 999 MHz + 987 MHz + 974 MHz + 961 MHz + 949 MHz + 936 MHz + 923 MHz + 911 MHz + 898 MHz + 885 MHz + 873 MHz + 860 MHz + 847 MHz + 835 MHz + 822 MHz + 810 MHz + 797 MHz + 784 MHz + 772 MHz + 759 MHz + 746 MHz + 734 MHz + 721 MHz + 708 MHz + 696 MHz + 683 MHz + 670 MHz + 658 MHz + 645 MHz + 632 MHz + 620 MHz + 607 MHz + 594 MHz + 582 MHz + 569 MHz + 556 MHz + 544 MHz + 531 MHz + 518 MHz + 506 MHz + 493 MHz + 480 MHz + 468 MHz + 455 MHz + 442 MHz + 430 MHz + 417 MHz + 405 MHz + 392 MHz + 379 MHz + 367 MHz + 354 MHz + 341 MHz + 329 MHz + 316 MHz + 303 MHz + 291 MHz + 278 MHz + 265 MHz + 253 MHz + 240 MHz + 227 MHz + 215 MHz + 202 MHz + 189 MHz + 177 MHz + 164 MHz + 151 MHz + 139 MHz + + + 405 MHz + 607 MHz + 594 MHz + 582 MHz + 569 MHz + 556 MHz + 544 MHz + 531 MHz + 518 MHz + 506 MHz + 493 MHz + 480 MHz + 468 MHz + 455 MHz + 442 MHz + 430 MHz + 417 MHz + 405 MHz + 392 MHz + 379 MHz + 367 MHz + 354 MHz + 341 MHz + 329 MHz + 316 MHz + 303 MHz + 291 MHz + 278 MHz + 265 MHz + 253 MHz + 240 MHz + 227 MHz + 215 MHz + 202 MHz + 189 MHz + 177 MHz + 164 MHz + 151 MHz + 139 MHz + + + + + N/A + N/A + 18179 + C + /usr/local/bin/ffmpeg + 916 MiB + + + + + + + \ No newline at end of file diff --git a/psutil/gpu/nvidia/fixtures/data2.xml b/psutil/gpu/nvidia/fixtures/data2.xml new file mode 100644 index 00000000..cd45d707 --- /dev/null +++ b/psutil/gpu/nvidia/fixtures/data2.xml @@ -0,0 +1,890 @@ + + + + Mon Jul 15 13:41:56 2024 + 555.42.06 + 12.5 + 2 + + NVIDIA L4 + NVIDIA + Ada Lovelace + Enabled + Disabled + Disabled + None + + N/A + N/A + + + None + + Disabled + 4000 + + N/A + N/A + + 1654523003308 + GPU-c5533cd4-5a60-059e-348d-b6d7466932e4 + 1 + 95.04.29.00.06 + No + 0x100 + 900-2G193-0000-001 + 27B8-895-A1 + N/A + 1 + + G193.0200.00.01 + 2.1 + 6.16 + N/A + + + N/A + N/A + + + N/A + N/A + + N/A + + None + N/A + N/A + + + No + N/A + + 555.42.06 + + N/A + + + 01 + 00 + 0000 + 3 + 2 + 27B810DE + 00000000:01:00.0 + 16CA10DE + + + 4 + 4 + 4 + 4 + 5 + + + 16x + 16x + + + + N/A + N/A + + 0 + 0 + 0 KB/s + 0 KB/s + N/A + N/A + + N/A + P0 + + Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + + N/A + + 23034 MiB + 434 MiB + 1 MiB + 22601 MiB + + + 32768 MiB + 1 MiB + 32767 MiB + + + 0 MiB + 0 MiB + 0 MiB + + Default + + 2 % + 0 % + 0 % + 0 % + 0 % + 0 % + + + 0 + 0 + 0 + + + 0 + 0 + 0 + + + Enabled + Enabled + + + + 0 + 0 + 0 + 0 + 0 + + + 0 + 0 + 0 + 0 + 0 + No + + + 0 + 0 + 0 + 0 + 0 + + + + + N/A + N/A + + + N/A + N/A + + N/A + N/A + + + 0 + 0 + No + No + + 96 bank(s) + 0 bank(s) + 0 bank(s) + 0 bank(s) + 0 bank(s) + + + + 45 C + 39 C + -5 C + -2 C + 0 C + N/A + N/A + N/A + + + N/A + N/A + + + P0 + 27.22 W + 72.00 W + 72.00 W + 72.00 W + 40.00 W + 72.00 W + + + N/A + + + P0 + N/A + N/A + N/A + N/A + N/A + N/A + + + 2040 MHz + 2040 MHz + 6250 MHz + 1770 MHz + + + 2040 MHz + 6251 MHz + + + 2040 MHz + 6251 MHz + + + N/A + + + 2040 MHz + 2040 MHz + 6251 MHz + 1770 MHz + + + 2040 MHz + + + N/A + N/A + + + 885.000 mV + + + N/A + N/A + N/A + N/A + + N/A + + + + + 6251 MHz + 2040 MHz + 2025 MHz + 2010 MHz + 1995 MHz + 1980 MHz + 1965 MHz + 1950 MHz + 1935 MHz + 1920 MHz + 1905 MHz + 1890 MHz + 1875 MHz + 1860 MHz + 1845 MHz + 1830 MHz + 1815 MHz + 1800 MHz + 1785 MHz + 1770 MHz + 1755 MHz + 1740 MHz + 1725 MHz + 1710 MHz + 1695 MHz + 1680 MHz + 1665 MHz + 1650 MHz + 1635 MHz + 1620 MHz + 1605 MHz + 1590 MHz + 1575 MHz + 1560 MHz + 1545 MHz + 1530 MHz + 1515 MHz + 1500 MHz + 1485 MHz + 1470 MHz + 1455 MHz + 1440 MHz + 1425 MHz + 1410 MHz + 1395 MHz + 1380 MHz + 1365 MHz + 1350 MHz + 1335 MHz + 1320 MHz + 1305 MHz + 1290 MHz + 1275 MHz + 1260 MHz + 1245 MHz + 1230 MHz + 1215 MHz + 1200 MHz + 1185 MHz + 1170 MHz + 1155 MHz + 1140 MHz + 1125 MHz + 1110 MHz + 1095 MHz + 1080 MHz + 1065 MHz + 1050 MHz + 1035 MHz + 1020 MHz + 1005 MHz + 990 MHz + 975 MHz + 960 MHz + 945 MHz + 930 MHz + 915 MHz + 900 MHz + 885 MHz + 870 MHz + 855 MHz + 840 MHz + 825 MHz + 810 MHz + 795 MHz + 780 MHz + 765 MHz + 750 MHz + 735 MHz + 720 MHz + 705 MHz + 690 MHz + 675 MHz + 660 MHz + 645 MHz + 630 MHz + 615 MHz + 600 MHz + 585 MHz + 570 MHz + 555 MHz + 540 MHz + 525 MHz + 510 MHz + 495 MHz + 480 MHz + 465 MHz + 450 MHz + 435 MHz + 420 MHz + 405 MHz + 390 MHz + 375 MHz + 360 MHz + 345 MHz + 330 MHz + 315 MHz + 300 MHz + 285 MHz + 270 MHz + 255 MHz + 240 MHz + 225 MHz + 210 MHz + + + 405 MHz + 645 MHz + 630 MHz + 615 MHz + 600 MHz + 585 MHz + 570 MHz + 555 MHz + 540 MHz + 525 MHz + 510 MHz + 495 MHz + 480 MHz + 465 MHz + 450 MHz + 435 MHz + 420 MHz + 405 MHz + 390 MHz + 375 MHz + 360 MHz + 345 MHz + 330 MHz + 315 MHz + 300 MHz + 285 MHz + 270 MHz + 255 MHz + 240 MHz + 225 MHz + 210 MHz + + + + + + + + disabled + + + + + NVIDIA L4 + NVIDIA + Ada Lovelace + Enabled + Disabled + Disabled + None + + N/A + N/A + + + None + + Disabled + 4000 + + N/A + N/A + + 1654523001128 + GPU-128ab6fb-6ec9-fd74-b479-4a5fd14f55bd + 0 + 95.04.29.00.06 + No + 0xc100 + 900-2G193-0000-001 + 27B8-895-A1 + N/A + 1 + + G193.0200.00.01 + 2.1 + 6.16 + N/A + + + N/A + N/A + + + N/A + N/A + + N/A + + None + N/A + N/A + + + No + N/A + + 555.42.06 + + N/A + + + C1 + 00 + 0000 + 3 + 2 + 27B810DE + 00000000:C1:00.0 + 16CA10DE + + + 4 + 4 + 4 + 4 + 5 + + + 16x + 1x + + + + N/A + N/A + + 0 + 0 + 0 KB/s + 0 KB/s + N/A + N/A + + N/A + P0 + + Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + + N/A + + 23034 MiB + 434 MiB + 1 MiB + 22601 MiB + + + 32768 MiB + 1 MiB + 32767 MiB + + + 0 MiB + 0 MiB + 0 MiB + + Default + + 3 % + 0 % + 0 % + 0 % + 0 % + 0 % + + + 0 + 0 + 0 + + + 0 + 0 + 0 + + + Enabled + Enabled + + + + 0 + 0 + 0 + 0 + 0 + + + 0 + 0 + 0 + 0 + 0 + No + + + 0 + 0 + 0 + 0 + 0 + + + + + N/A + N/A + + + N/A + N/A + + N/A + N/A + + + 0 + 0 + No + No + + 96 bank(s) + 0 bank(s) + 0 bank(s) + 0 bank(s) + 0 bank(s) + + + + 40 C + 43 C + -5 C + -2 C + 0 C + N/A + N/A + N/A + + + N/A + N/A + + + P0 + 29.54 W + 72.00 W + 72.00 W + 72.00 W + 40.00 W + 72.00 W + + + N/A + + + P0 + N/A + N/A + N/A + N/A + N/A + N/A + + + 2040 MHz + 2040 MHz + 6250 MHz + 1770 MHz + + + 2040 MHz + 6251 MHz + + + 2040 MHz + 6251 MHz + + + N/A + + + 2040 MHz + 2040 MHz + 6251 MHz + 1770 MHz + + + 2040 MHz + + + N/A + N/A + + + 910.000 mV + + + N/A + N/A + N/A + N/A + + N/A + + + + + 6251 MHz + 2040 MHz + 2025 MHz + 2010 MHz + 1995 MHz + 1980 MHz + 1965 MHz + 1950 MHz + 1935 MHz + 1920 MHz + 1905 MHz + 1890 MHz + 1875 MHz + 1860 MHz + 1845 MHz + 1830 MHz + 1815 MHz + 1800 MHz + 1785 MHz + 1770 MHz + 1755 MHz + 1740 MHz + 1725 MHz + 1710 MHz + 1695 MHz + 1680 MHz + 1665 MHz + 1650 MHz + 1635 MHz + 1620 MHz + 1605 MHz + 1590 MHz + 1575 MHz + 1560 MHz + 1545 MHz + 1530 MHz + 1515 MHz + 1500 MHz + 1485 MHz + 1470 MHz + 1455 MHz + 1440 MHz + 1425 MHz + 1410 MHz + 1395 MHz + 1380 MHz + 1365 MHz + 1350 MHz + 1335 MHz + 1320 MHz + 1305 MHz + 1290 MHz + 1275 MHz + 1260 MHz + 1245 MHz + 1230 MHz + 1215 MHz + 1200 MHz + 1185 MHz + 1170 MHz + 1155 MHz + 1140 MHz + 1125 MHz + 1110 MHz + 1095 MHz + 1080 MHz + 1065 MHz + 1050 MHz + 1035 MHz + 1020 MHz + 1005 MHz + 990 MHz + 975 MHz + 960 MHz + 945 MHz + 930 MHz + 915 MHz + 900 MHz + 885 MHz + 870 MHz + 855 MHz + 840 MHz + 825 MHz + 810 MHz + 795 MHz + 780 MHz + 765 MHz + 750 MHz + 735 MHz + 720 MHz + 705 MHz + 690 MHz + 675 MHz + 660 MHz + 645 MHz + 630 MHz + 615 MHz + 600 MHz + 585 MHz + 570 MHz + 555 MHz + 540 MHz + 525 MHz + 510 MHz + 495 MHz + 480 MHz + 465 MHz + 450 MHz + 435 MHz + 420 MHz + 405 MHz + 390 MHz + 375 MHz + 360 MHz + 345 MHz + 330 MHz + 315 MHz + 300 MHz + 285 MHz + 270 MHz + 255 MHz + 240 MHz + 225 MHz + 210 MHz + + + 405 MHz + 645 MHz + 630 MHz + 615 MHz + 600 MHz + 585 MHz + 570 MHz + 555 MHz + 540 MHz + 525 MHz + 510 MHz + 495 MHz + 480 MHz + 465 MHz + 450 MHz + 435 MHz + 420 MHz + 405 MHz + 390 MHz + 375 MHz + 360 MHz + 345 MHz + 330 MHz + 315 MHz + 300 MHz + 285 MHz + 270 MHz + 255 MHz + 240 MHz + 225 MHz + 210 MHz + + + + + + + + disabled + + + + \ No newline at end of file diff --git a/psutil/gpu/nvidia/fixtures/data3.xml b/psutil/gpu/nvidia/fixtures/data3.xml new file mode 100644 index 00000000..86d6ec33 --- /dev/null +++ b/psutil/gpu/nvidia/fixtures/data3.xml @@ -0,0 +1,242 @@ + + + + Mon Jul 15 15:24:14 2024 + 440.33.01 + 10.2 + 1 + + GeForce GTX 1080 + GeForce + Disabled + Disabled + Disabled + Disabled + 4000 + + N/A + N/A + + N/A + GPU-bf6e9a3a-e0bb-c253-45b4-34c99ec25512 + 0 + 86.04.17.00.01 + No + 0x100 + N/A + + G001.0000.01.03 + 1.1 + N/A + N/A + + + N/A + N/A + + + None + N/A + + + N/A + + + 01 + 00 + 0000 + 1B8010DE + 00000000:01:00.0 + 119E10DE + + + 3 + 3 + + + 16x + 16x + + + + N/A + N/A + + 0 + 0 + 783000 KB/s + 1269000 KB/s + + 53 % + P2 + + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + Not Active + + + 8119 MiB + 2006 MiB + 6113 MiB + + + 256 MiB + 2 MiB + 254 MiB + + Default + + 32 % + 11 % + 17 % + 25 % + + + 0 + 0 + 0 + + + 0 + 0 + 0 + + + N/A + N/A + + + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + N/A + N/A + N/A + N/A + N/A + N/A + N/A + N/A + + + + + + N/A + N/A + + + N/A + N/A + + N/A + N/A + + + 65 C + 99 C + 96 C + N/A + N/A + N/A + + + P2 + Supported + 89.74 W + 180.00 W + 180.00 W + 180.00 W + 90.00 W + 180.00 W + + + 1885 MHz + 1885 MHz + 4513 MHz + 1695 MHz + + + N/A + N/A + + + N/A + N/A + + + 1911 MHz + 1911 MHz + 5005 MHz + 1708 MHz + + + N/A + + + N/A + N/A + + N/A + + + 10131 + C + ffmpeg + 389 MiB + + + 13597 + C + ffmpeg + 1054 MiB + + + 16870 + C + ffmpeg + 549 MiB + + + + + + + \ No newline at end of file diff --git a/psutil/gpu/nvidia/nvidia.go b/psutil/gpu/nvidia/nvidia.go new file mode 100644 index 00000000..ba45e2fa --- /dev/null +++ b/psutil/gpu/nvidia/nvidia.go @@ -0,0 +1,284 @@ +package nvidia + +import ( + "bytes" + "context" + "encoding/xml" + "fmt" + "os/exec" + "sync" + "time" + + "github.com/datarhei/core/v16/psutil/gpu" +) + +var Default gpu.GPU + +func init() { + Default = New("") +} + +type Megabytes uint64 + +func (m *Megabytes) UnmarshalText(text []byte) error { + value := uint64(0) + _, err := fmt.Sscanf(string(text), "%d MiB", &value) + if err != nil { + return err + } + + *m = Megabytes(value * 1024 * 1024) + + return nil +} + +type Utilization float64 + +func (u *Utilization) UnmarshalText(text []byte) error { + value := float64(0) + _, err := fmt.Sscanf(string(text), "%f %%", &value) + if err != nil { + return err + } + + *u = Utilization(value) + + return nil +} + +type Process struct { + PID int32 `xml:"pid"` + Memory Megabytes `xml:"used_memory"` +} + +type GPUStats struct { + Name string `xml:"product_name"` + Architecture string `xml:"product_architecture"` + + MemoryTotal Megabytes `xml:"fb_memory_usage>total"` + MemoryUsed Megabytes `xml:"fb_memory_usage>used"` + + Usage Utilization `xml:"utilization>gpu_util"` + MemoryUsage Utilization `xml:"utilization>memory_util"` + EncoderUsage Utilization `xml:"utilization>encoder_util"` + DecoderUsage Utilization `xml:"utilization>decoder_util"` + + Process []Process `xml:"processes>process_info"` +} + +type Stats struct { + GPU []GPUStats `xml:"gpu"` +} + +func parse(data []byte) (Stats, error) { + nv := Stats{} + + err := xml.Unmarshal(data, &nv) + if err != nil { + return nv, fmt.Errorf("parsing report: %w", err) + } + + return nv, nil +} + +type nvidia struct { + cmd *exec.Cmd + wr *writer + + lock sync.RWMutex + cancel context.CancelFunc + stats Stats + process map[int32]Process + err error +} + +type dummy struct{} + +func (d *dummy) Count() (int, error) { return 0, nil } +func (d *dummy) Stats() ([]gpu.Stats, error) { return nil, nil } +func (d *dummy) Process(pid int32) (gpu.Process, error) { return gpu.Process{}, gpu.ErrProcessNotFound } + +type writer struct { + buf bytes.Buffer + ch chan Stats +} + +var terminator = []byte("\n") + +func (w *writer) Write(data []byte) (int, error) { + n, err := w.buf.Write(data) + if err != nil { + return n, err + } + + for { + idx := bytes.Index(w.buf.Bytes(), terminator) + if idx == -1 { + break + } + + content := make([]byte, idx+len(terminator)) + n, err := w.buf.Read(content) + if err != nil || n != len(content) { + break + } + + s, err := parse(content) + if err != nil { + continue + } + + w.ch <- s + } + + return n, nil +} + +func New(path string) gpu.GPU { + if len(path) == 0 { + path = "nvidia-smi" + } + + _, err := exec.LookPath(path) + if err != nil { + return &dummy{} + } + + n := &nvidia{ + wr: &writer{ + ch: make(chan Stats, 1), + }, + process: map[int32]Process{}, + } + + ctx, cancel := context.WithCancel(context.Background()) + n.cancel = cancel + + go n.runner(ctx, path) + go n.reader(ctx) + + return n +} + +func (n *nvidia) reader(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case stats := <-n.wr.ch: + n.lock.Lock() + n.stats = stats + n.process = map[int32]Process{} + for _, g := range n.stats.GPU { + for _, p := range g.Process { + n.process[p.PID] = p + } + } + n.lock.Unlock() + } + } +} + +func (n *nvidia) runner(ctx context.Context, path string) { + for { + n.cmd = exec.Command(path, "-q", "-x", "-l", "1") + n.cmd.Stdout = n.wr + err := n.cmd.Start() + if err != nil { + n.lock.Lock() + n.err = err + n.lock.Unlock() + + time.Sleep(3 * time.Second) + continue + } + + err = n.cmd.Wait() + + n.lock.Lock() + n.err = err + n.lock.Unlock() + + select { + case <-ctx.Done(): + return + default: + } + } +} + +func (n *nvidia) Count() (int, error) { + n.lock.RLock() + defer n.lock.RUnlock() + + if n.err != nil { + return 0, n.err + } + + return len(n.stats.GPU), nil +} + +func (n *nvidia) Stats() ([]gpu.Stats, error) { + s := []gpu.Stats{} + + n.lock.RLock() + defer n.lock.RUnlock() + + if n.err != nil { + return s, n.err + } + + for _, nv := range n.stats.GPU { + stats := gpu.Stats{ + Name: nv.Name, + Architecture: nv.Architecture, + MemoryTotal: uint64(nv.MemoryTotal), + MemoryUsed: uint64(nv.MemoryUsed), + Usage: float64(nv.Usage), + MemoryUsage: float64(nv.MemoryUsage), + EncoderUsage: float64(nv.EncoderUsage), + DecoderUsage: float64(nv.DecoderUsage), + Process: []gpu.Process{}, + } + + for _, p := range nv.Process { + stats.Process = append(stats.Process, gpu.Process{ + PID: p.PID, + Memory: uint64(p.Memory), + }) + } + + s = append(s, stats) + } + + return s, nil +} + +func (n *nvidia) Process(pid int32) (gpu.Process, error) { + n.lock.RLock() + defer n.lock.RUnlock() + + p, hasProcess := n.process[pid] + if !hasProcess { + return gpu.Process{}, gpu.ErrProcessNotFound + } + + return gpu.Process{ + PID: p.PID, + Memory: uint64(p.Memory), + }, nil +} + +func (n *nvidia) Close() { + n.lock.Lock() + defer n.lock.Unlock() + + if n.cancel == nil { + return + } + + n.cancel() + n.cancel = nil + + n.cmd.Process.Kill() +} diff --git a/psutil/gpu/nvidia/nvidia_test.go b/psutil/gpu/nvidia/nvidia_test.go new file mode 100644 index 00000000..f18310b2 --- /dev/null +++ b/psutil/gpu/nvidia/nvidia_test.go @@ -0,0 +1,102 @@ +package nvidia + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseNV(t *testing.T) { + data, err := os.ReadFile("./fixtures/data1.xml") + require.NoError(t, err) + + nv, err := parse(data) + require.NoError(t, err) + + require.Equal(t, Stats{ + GPU: []GPUStats{ + { + Name: "NVIDIA GeForce GTX 1080", + Architecture: "Pascal", + MemoryTotal: 8119 * 1024 * 1024, + MemoryUsed: 918 * 1024 * 1024, + Usage: 15, + MemoryUsage: 7, + EncoderUsage: 3, + DecoderUsage: 0, + Process: []Process{ + { + PID: 18179, + Memory: 916 * 1024 * 1024, + }, + }, + }, + }, + }, nv) + + data, err = os.ReadFile("./fixtures/data2.xml") + require.NoError(t, err) + + nv, err = parse(data) + require.NoError(t, err) + + require.Equal(t, Stats{ + GPU: []GPUStats{ + { + Name: "NVIDIA L4", + Architecture: "Ada Lovelace", + MemoryTotal: 23034 * 1024 * 1024, + MemoryUsed: 1 * 1024 * 1024, + Usage: 2, + MemoryUsage: 0, + EncoderUsage: 0, + DecoderUsage: 0, + }, + { + Name: "NVIDIA L4", + Architecture: "Ada Lovelace", + MemoryTotal: 23034 * 1024 * 1024, + MemoryUsed: 1 * 1024 * 1024, + Usage: 3, + MemoryUsage: 0, + EncoderUsage: 0, + DecoderUsage: 0, + }, + }, + }, nv) + + data, err = os.ReadFile("./fixtures/data3.xml") + require.NoError(t, err) + + nv, err = parse(data) + require.NoError(t, err) + + require.Equal(t, Stats{ + GPU: []GPUStats{ + { + Name: "GeForce GTX 1080", + MemoryTotal: 8119 * 1024 * 1024, + MemoryUsed: 2006 * 1024 * 1024, + Usage: 32, + MemoryUsage: 11, + EncoderUsage: 17, + DecoderUsage: 25, + Process: []Process{ + { + PID: 10131, + Memory: 389 * 1024 * 1024, + }, + { + PID: 13597, + Memory: 1054 * 1024 * 1024, + }, + { + PID: 16870, + Memory: 549 * 1024 * 1024, + }, + }, + }, + }, + }, nv) +} diff --git a/psutil/psutil.go b/psutil/psutil.go index f6b95934..73c47a67 100644 --- a/psutil/psutil.go +++ b/psutil/psutil.go @@ -13,6 +13,8 @@ import ( "sync" "time" + "github.com/datarhei/core/v16/psutil/gpu/nvidia" + "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/disk" "github.com/shirou/gopsutil/v3/mem" @@ -58,6 +60,18 @@ type CPUInfoStat struct { Other float64 // percent 0-100 } +type GPUInfoStat struct { + Name string + + MemoryTotal uint64 // bytes + MemoryUsed uint64 // bytes + + Usage float64 // percent 0-100 + MemoryUsage float64 // percent 0-100 + EncoderUsage float64 // percent 0-100 + DecoderUsage float64 // percent 0-100 +} + type cpuTimesStat struct { total float64 // seconds system float64 // seconds @@ -73,12 +87,16 @@ type Util interface { // CPUCounts returns the number of cores, either logical or physical. CPUCounts(logical bool) (float64, error) + // GPUCounts returns the number of GPU cores. + GPUCounts() (float64, error) + // CPUPercent returns the current CPU load in percent. The values range // from 0 to 100, independently of the number of logical cores. CPUPercent() (*CPUInfoStat, error) DiskUsage(path string) (*disk.UsageStat, error) VirtualMemory() (*MemoryInfoStat, error) NetIOCounters(pernic bool) ([]net.IOCountersStat, error) + GPUStats() ([]GPUInfoStat, error) // Process returns a process observer for a process with the given pid. Process(pid int32) (Process, error) @@ -282,6 +300,16 @@ func CPUCounts(logical bool) (float64, error) { return DefaultUtil.CPUCounts(logical) } +func (u *util) GPUCounts() (float64, error) { + count, err := nvidia.Default.Count() + + return float64(count), err +} + +func GPUCounts() (float64, error) { + return DefaultUtil.GPUCounts() +} + // cpuTimes returns the current cpu usage times in seconds. func (u *util) cpuTimes() (*cpuTimesStat, error) { if u.hasCgroup && u.cpuLimit > 0 { @@ -534,3 +562,30 @@ func cpuTotal(c *cpu.TimesStat) float64 { return c.User + c.System + c.Idle + c.Nice + c.Iowait + c.Irq + c.Softirq + c.Steal + c.Guest + c.GuestNice } + +func (u *util) GPUStats() ([]GPUInfoStat, error) { + nvstats, err := nvidia.Default.Stats() + if err != nil { + return nil, err + } + + stats := []GPUInfoStat{} + + for _, nv := range nvstats { + stats = append(stats, GPUInfoStat{ + Name: nv.Name, + MemoryTotal: nv.MemoryTotal, + MemoryUsed: nv.MemoryUsed, + Usage: nv.Usage, + MemoryUsage: nv.MemoryUsage, + EncoderUsage: nv.EncoderUsage, + DecoderUsage: nv.DecoderUsage, + }) + } + + return stats, nil +} + +func GPUStats() ([]GPUInfoStat, error) { + return DefaultUtil.GPUStats() +} diff --git a/resources/resources_test.go b/resources/resources_test.go index 0158c7f7..3d26c40c 100644 --- a/resources/resources_test.go +++ b/resources/resources_test.go @@ -21,6 +21,10 @@ func (u *util) CPUCounts(logical bool) (float64, error) { return 2, nil } +func (u *util) GPUCounts() (float64, error) { + return 0, nil +} + func (u *util) CPUPercent() (*psutil.CPUInfoStat, error) { return &psutil.CPUInfoStat{ System: 10, @@ -46,6 +50,10 @@ func (u *util) NetIOCounters(pernic bool) ([]net.IOCountersStat, error) { return nil, nil } +func (u *util) GPUStats() ([]psutil.GPUInfoStat, error) { + return nil, nil +} + func (u *util) Process(pid int32) (psutil.Process, error) { return nil, nil }