上一次讲到内存监控的几个点,这里来分析但发生内存问题时dump的时机及方式
dump的时机
1.activity(fragment)生命周期结束
在 activity 调用 onDestory 之后,可以认为该 activity 及与之相关的东西都没用了,应该被回收。所以可以在调用 onDestory 之后将 activity 放入一个若引用中,隔一段时间来查看弱引用中的 activity 是否还存在来判断 activity 是否被系统回收。
注意点
- 隔一段时间是因为当调用 Runtime.getRuntime().gc(); System.runFinalization(); 之类的函数,系统也不会立即就去执行回收操作,还是要看系统何时去执行 GC 操作。
- 判断若引用内的 obj 是否被回收有二种方法,一是判断 WeakReference.get() == null ,二是在构造 WeakReference 时传入一个 ReferenceQueue 队列,当弱引用内的 obj 被回收时,队列里面就可以取出东西来判断。
application.registerActivityLifecycleCallbacks(new Application.ActivityLifecycleCallbacks {
...
@Override
public void onActivityDestroyed(@NonNull Activity activity) {
CheckLeakMomory.getInstance().addMoniter(activity);
}
});
public class CheckLeakMomory implements Runnable {
private volatile static CheckLeakMomory instance;
private final long gcTime = 5_000;
private List<ActivityInfo> activityList = new ArrayList();
private ReferenceQueue queue;
private HandlerThread handlerThread;
private Handler handler;
private CheckLeakMomory() {
queue = new ReferenceQueue();
handlerThread = new HandlerThread("CheckLeakMomory");
handlerThread.start();
handler = new Handler(handlerThread.getLooper());
}
public static CheckLeakMomory getInstance() {
if (instance == null) {
synchronized (CheckLeakMomory.class) {
if (instance == null) {
instance = new CheckLeakMomory();
}
}
}
return instance;
}
public void addMoniter(Activity activity) {
WeakReference<Activity> activityWeakReference = new WeakReference(activity, queue);
activityList.add(0, new ActivityInfo(activityWeakReference));
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
if (!handler.hasCallbacks(this)) {
handler.postDelayed(this, gcTime);
}
}
}
@Override
public void run() {
runGC();
boolean hasnext = false;
for (ActivityInfo info : activityList) {
if (info.isLeak()) {
dumpMemory(info.getActivity());
clear();
break;
} else if (info.waitNextGC()) {
hasnext = true;
}
}
if (hasnext) {
runGC();
runGC();
runGC();
handler.postDelayed(this, gcTime);
}
}
private void runGC() {
Runtime.getRuntime().gc();
System.runFinalization();
}
private void clear() {
activityList.clear();
handler.removeCallbacks(this);
}
private void dumpMemory(Activity activity) {
Log.e("lyll", "发现内存蟹肉 " + activity.getClass().getSimpleName());
try {
String path =new File(activity.getFilesDir().getPath()).getAbsolutePath();
HeapAnalyzer.getInstance().dumpAndAnalysisLeak(path,activity.getClass().getSimpleName());
} catch (Exception e) {
e.printStackTrace();
}
}
class ActivityInfo {
private WeakReference<Activity> activityWeakReference;
private int gcTimes = 0;
public ActivityInfo(WeakReference<Activity> activityWeakReference) {
this.activityWeakReference = activityWeakReference;
}
public boolean waitNextGC() {
return gcTimes <= 3;
}
public boolean isLeak() {
gcTimes++;
if (activityWeakReference.get() != null && gcTimes > 3) {
return true;
}
return false;
}
public Activity getActivity() {
return activityWeakReference.get();
}
}
}
这里判断 actviity 是否被回收判断了3次,之前判断一次时会出现极个别情况是可以被正常回收的但是在一次延时内未被回收的情况,所以判断了三次
2.内存阈值的判断
每隔一段时间去轮询判断当前进程的内存使用比,线程数,打开文件数来判断当前进程的内存情况,优点是可以统计除 activity 和 fragment 外的内存使用,目前也没有很多限制。
- 在 linux 中,打开的线程数和文件数是有限制的,好像是 1024 个(和使用的文件系统有关)。在 android 中也是有限制,可以通过命令查看。
//查看最大线程数
cat /proc/sys/kernel/threads-max
//查看最大打开文件数
adb shell ulimit -a
cat /proc/pid/limits
当打开超过阈值时,就会发生OOM。
代码参考自 KOOM
public class KOOMMoniter extends Moniter implements Runnable {
private static final String TAG = "KOOMMoniter";
private final int CHECK_INTERVAL = 10_000;
private final float HEAP_RATIO_THRESHOLD_GAP = 0.05f;
private final int THREAD_COUNT_THRESHOLD_GAP = 50;//Thread连续值递增浮动范围50
private Application application;
private ILog logImpl;
private HandlerThread handlerThread;
private Handler handler;
//heap
private float heapThreshold = 0.8f;
private float mLastHeapRatio;
private int mOverThresholdCount;
private int heapMaxOverThresholdCount = 3;
//thread
private int threadThreshold = 200;
private int mLastThreadCount;
private int threadMaxOverThresholdCount = 600;
//fd
private int fdThreshold = 1000;
private int mLastFdCount;
private final int FD_COUNT_THRESHOLD_GAP = 50; //FD连续值递增浮动范围50
private int maxOverThresholdCount = 500;
//PhysicalMemory
private float deviceMemoryThreshold = 0.05f;
//FastHugeMemory
private float forceDumpJavaHeapMaxThreshold = 0.90f;
private final String REASON_HIGH_WATERMARK = "high_watermark";
private final String REASON_HUGE_DELTA = "delta";
private String mDumpReason = "";
private int forceDumpJavaHeapDeltaThreshold = 350_000;
public KOOMMoniter(Application app, ILog logImpl) {
application =app;
this.logImpl = logImpl;
handlerThread = new HandlerThread("KOOMMoniter");
handlerThread.start();
handler = new Handler(handlerThread.getLooper());
}
@Override
public void start() {
handler.postDelayed(this, CHECK_INTERVAL);
}
@Override
public void stop() {
handler.removeCallbacks(this);
}
@Override
public void report() {
}
@Override
public void run() {
SystemInfo.refresh();
if (isHeapOOMTracker() || isThreadOOMTracker() || isFDOOMTracjer() ||
isPhysicalMemoryOOMTracker() || isFastHugeMemoryOOMTracker()) {
dump();
}
handler.postDelayed(this, CHECK_INTERVAL);
}
private boolean isFastHugeMemoryOOMTracker() {
SystemInfo.JavaHeap javaHeap = SystemInfo.javaHeap;
// 高危阈值直接触发dump分析
if (javaHeap.rate > forceDumpJavaHeapMaxThreshold) {
mDumpReason = REASON_HIGH_WATERMARK;
logImpl.Logi(TAG, "[meet condition] fast huge memory allocated detected, " +
"high memory watermark, force dump analysis!");
return true;
}
// 高差值直接dump
SystemInfo.JavaHeap lastJavaHeap = SystemInfo.lastJavaHeap;
if (lastJavaHeap.max != 0L && javaHeap.used - lastJavaHeap.used
> forceDumpJavaHeapDeltaThreshold * 1024.0f) {
mDumpReason = REASON_HUGE_DELTA;
logImpl.Logi(TAG, "[meet condition] fast huge memory allocated detected, " +
"over the delta threshold!");
return true;
}
return false;
}
private boolean isPhysicalMemoryOOMTracker() {
SystemInfo.MemInfo info = SystemInfo.memInfo;
if (info.rate < deviceMemoryThreshold) {
Log.i(TAG, "oom meminfo.rate < " + (deviceMemoryThreshold * 100) + "%");
//return true //先只是上传,不真实触发dump
} else if (info.rate < 0.10f) {
Log.i(TAG, "oom meminfo.rate < 10.0%");
} else if (info.rate < 0.15f) {
Log.i(TAG, "oom meminfo.rate < 15.0%");
} else if (info.rate < 0.20f) {
Log.i(TAG, "oom meminfo.rate < 20.0%");
} else if (info.rate < 0.30f) {
Log.i(TAG, "oom meminfo.rate < 30.0%");
}
return false;
}
private boolean isFDOOMTracjer() {
try {
File processFile = new File("/proc/self/fd");
int fdCount = processFile.listFiles().length;
if (fdCount > fdThreshold && fdCount >= mLastFdCount - FD_COUNT_THRESHOLD_GAP) {
mOverThresholdCount++;
logImpl.Logi(TAG,
"[meet condition] "
+ "overThresholdCount: $mOverThresholdCount"
+ ", fdCount: $fdCount");
// dumpFdIfNeed() 拿进程全部fd /proc/self/fd
} else {
resetFDTracjer();
}
mLastFdCount = fdCount;
} catch (Exception e) {
e.printStackTrace();
}
return mOverThresholdCount >= maxOverThresholdCount;
}
private void resetFDTracjer() {
mLastFdCount = 0;
mOverThresholdCount = 0;
}
private boolean isThreadOOMTracker() {
int threadCount = SystemInfo.procStatus.thread;
if (threadCount > threadThreshold
&& threadCount >= mLastThreadCount - THREAD_COUNT_THRESHOLD_GAP) {
mOverThresholdCount++;
logImpl.Logi(TAG,
"[meet condition] "
+ "overThresholdCount:$mOverThresholdCount"
+ ", threadCount: $threadCount");
//dumpThreadIfNeed() 拿线程id /proc/self/task
} else {
resetThreadTracker();
}
mLastThreadCount = threadCount;
return mOverThresholdCount >= threadMaxOverThresholdCount;
}
private void resetThreadTracker() {
mLastThreadCount = 0;
mOverThresholdCount = 0;
}
private boolean isHeapOOMTracker() {
float heapRatio = SystemInfo.javaHeap.rate;
if (heapRatio > heapThreshold && heapRatio >= mLastHeapRatio - HEAP_RATIO_THRESHOLD_GAP) {
mOverThresholdCount++;
logImpl.Logi(TAG,
"[meet condition] "
+ "overThresholdCount: $mOverThresholdCount"
+ ", heapRatio: $heapRatio"
+ ", usedMem: ${SizeUnit.BYTE.toMB(SystemInfo.javaHeap.used)}mb"
+ ", max: ${SizeUnit.BYTE.toMB(SystemInfo.javaHeap.max)}mb");
} else {
resetHeap();
}
mLastHeapRatio = heapRatio;
return mOverThresholdCount >= heapMaxOverThresholdCount;
}
private void resetHeap() {
mLastHeapRatio = 0.0f;
mOverThresholdCount = 0;
}
private void dump() {
Calendar calendar = Calendar.getInstance();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd_hh-mm-ss");
String path = dateFormat.format(calendar.getTime()) + ".hprof";
File heapDumpFile = new File(application.getFilesDir(), path);
ForkJvmHeapDumper.getInstance().dumpAndAnalysisc(heapDumpFile.getAbsolutePath());
}
}
public class SystemInfo {
private static final String TAG = "SystemInfo";
static ProcStatus procStatus = new ProcStatus();
static ProcStatus lastProcStatus = new ProcStatus();
static MemInfo memInfo = new MemInfo();
static MemInfo lastMemInfo = new MemInfo();
static JavaHeap javaHeap = new JavaHeap();
static JavaHeap lastJavaHeap = new JavaHeap();
//selinux权限问题,先注释掉
//var dmaZoneInfo: ZoneInfo = ZoneInfo()
//var normalZoneInfo: ZoneInfo = ZoneInfo()
public static void refresh() {
lastJavaHeap = javaHeap;
lastMemInfo = memInfo;
lastProcStatus = procStatus;
javaHeap = new JavaHeap();
procStatus = new ProcStatus();
memInfo = new MemInfo();
javaHeap.max = Runtime.getRuntime().maxMemory();
javaHeap.total = Runtime.getRuntime().totalMemory();
javaHeap.free = Runtime.getRuntime().freeMemory();
javaHeap.used = javaHeap.total - javaHeap.free;
javaHeap.rate = 1.0f * javaHeap.used / javaHeap.max;
try {
File processFile = new File("/proc/self/status");
BufferedReader br = new BufferedReader(new FileReader(processFile));
String line;
while ((line = br.readLine()) != null) {
if (procStatus.vssInKb != 0 && procStatus.rssInKb != 0 && procStatus.thread != 0) {
break;
}
if (line.startsWith("VmSize")) {
procStatus.vssInKb = Integer.parseInt(
line.replace(" ", "").replace("VmSize:", "").replace("kB", "").trim()
);
} else if (line.startsWith("VmRSS")) {
procStatus.rssInKb = Integer.parseInt(
line.replace(" ", "").replace("VmRSS:", "").replace("kB", "").trim()
);
} else if (line.startsWith("Threads")) {
procStatus.thread = Integer.parseInt(
line.replace(" ", "").replace("Threads:", "").trim()
);
}
}
} catch (Exception e) {
e.printStackTrace();
}
try {
File memFile = new File("/proc/meminfo");
BufferedReader br = new BufferedReader(new FileReader(memFile));
String line;
while ((line = br.readLine()) != null) {
if (procStatus.vssInKb != 0 && procStatus.rssInKb != 0 && procStatus.thread != 0) {
break;
}
if (line.startsWith("MemTotal")) {
memInfo.totalInKb = Integer.parseInt(
line.replace(" ", "").replace("MemTotal:", "").replace("kB", "").trim()
);
} else if (line.startsWith("MemFree")) {
memInfo.freeInKb = Integer.parseInt(
line.replace(" ", "").replace("MemFree:", "").replace("kB", "").trim()
);
} else if (line.startsWith("MemAvailable")) {
memInfo.availableInKb = Integer.parseInt(
line.replace(" ", "").replace("MemAvailable:", "").replace("kB", "").trim()
);
} else if (line.startsWith("CmaTotal")) {
memInfo.cmaTotal = Integer.parseInt(
line.replace(" ", "").replace("CmaTotal:", "").replace("kB", "").trim()
);
} else if (line.startsWith("ION_heap")) {
memInfo.IONHeap = Integer.parseInt(
line.replace(" ", "").replace("ION_heap:", "").replace("kB", "").trim()
);
}
}
} catch (Exception e) {
e.printStackTrace();
}
memInfo.rate = 1.0f * memInfo.availableInKb / memInfo.totalInKb;
Log.i(TAG, "----OOM Monitor Memory----");
Log.i(TAG, "[java] max:" + javaHeap.max + " used ratio:" + (javaHeap.rate * 100) + "%");
Log.i(TAG, "[proc] VmSize:" + procStatus.vssInKb + "kB VmRss:" + procStatus.rssInKb + "kB " + "Threads:" + procStatus.thread);
Log.i(TAG, "[meminfo] MemTotal:" + memInfo.totalInKb + "kB MemFree:" + memInfo.freeInKb + "kB " + "MemAvailable:" + memInfo.availableInKb + "kB");
Log.i(TAG, "avaliable ratio:" + (memInfo.rate * 100) + "% CmaTotal:" + memInfo.cmaTotal + "kB ION_heap:" + memInfo.IONHeap + "kB");
}
static class ProcStatus {
int thread;
int vssInKb;
int rssInKb;
}
static class MemInfo {
int totalInKb;
int freeInKb;
int availableInKb;
int IONHeap;
int cmaTotal;
float rate;
}
static class JavaHeap {
long max;
long total;
long free;
long used;
float rate;
}
}
dump的方式
1.开线程dump
优点:简单
缺点:在 dump 过程中会冻住进程,使其无法工作。
new Thread(new Runnable() {
@Override
public void run() {
Debug.dumpHprofData(path);
}
}).start();
2.开子进程dump
利用 linux 的 Copy-On-Write 机制,fork 的进程与父进程有相同的内存,并且 fork 的时间也很短,在子进程中执行 dump 过程中不会影响到父进程的运行。优缺点与上面相反。
伪代码如下
1.在 fork 前挂起所有的线程(native 层)
// art::Dbg::SuspendVM
void (*suspend_vm_fnc_)();
2.fork (native 层)
pid = fork();
if(pid == 0){
//在子进程中执行相应的操作
}else if(pid > 0){
//在父进程中
}else{
//fork出问题
}
3.fork 后恢复 (native 层)
// art::Dbg::ResumeVM
void (*resume_vm_fnc_)();
4.子进程 dump (在 java 层),dump完成后退出进程(native 层)
Debug.dumpHprofData(path);
exit(0);
5.父进程等待子进程退出(native 层)
waitpid(pid, &status, 0) ;
6.父进程拿到 dump 后的 hprof 文件
这里的代码涉及到查找 libart.so 中的挂起和恢复,没有足够的能力写,但大致的步骤是:
1.通过 dlopen() 打开加载 libart.so ,得到 so 的句柄 handle
2.通过 dlsym() 查找挂起和恢复函数的地址,(根据动态链接库操作句柄与符号,返回符号对应的地址)(每个 android 版本的 libart.so 中的挂起和恢复函数的符号会不一样,这里需要兼容版本,readelf xxx.so 可查看函数符号)