2019-12-21 lowmemkiller

lowmemkill 涉及3个部分

app测

生命周期相关，细分有22个级别的app状态

 /** @hide Not a real process state. */
    public static final int PROCESS_STATE_UNKNOWN = -1;

    /** @hide Process is a persistent system process. */
    public static final int PROCESS_STATE_PERSISTENT = 0;

    /** @hide Process is a persistent system process and is doing UI. */
    public static final int PROCESS_STATE_PERSISTENT_UI = 1;

    /** @hide Process is hosting the current top activities.  Note that this covers
     * all activities that are visible to the user. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_TOP = 2;

    /** @hide Process is hosting a foreground service with location type. */
    public static final int PROCESS_STATE_FOREGROUND_SERVICE_LOCATION = 3;

    /** @hide Process is bound to a TOP app. This is ranked below SERVICE_LOCATION so that
     * it doesn't get the capability of location access while-in-use. */
    public static final int PROCESS_STATE_BOUND_TOP = 4;

    /** @hide Process is hosting a foreground service. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_FOREGROUND_SERVICE = 5;

    /** @hide Process is hosting a foreground service due to a system binding. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_BOUND_FOREGROUND_SERVICE = 6;

    /** @hide Process is important to the user, and something they are aware of. */
    public static final int PROCESS_STATE_IMPORTANT_FOREGROUND = 7;

    /** @hide Process is important to the user, but not something they are aware of. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_IMPORTANT_BACKGROUND = 8;

    /** @hide Process is in the background transient so we will try to keep running. */
    public static final int PROCESS_STATE_TRANSIENT_BACKGROUND = 9;

    /** @hide Process is in the background running a backup/restore operation. */
    public static final int PROCESS_STATE_BACKUP = 10;

    /** @hide Process is in the background running a service.  Unlike oom_adj, this level
     * is used for both the normal running in background state and the executing
     * operations state. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_SERVICE = 11;

    /** @hide Process is in the background running a receiver.   Note that from the
     * perspective of oom_adj, receivers run at a higher foreground level, but for our
     * prioritization here that is not necessary and putting them below services means
     * many fewer changes in some process states as they receive broadcasts. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_RECEIVER = 12;

    /** @hide Same as {@link #PROCESS_STATE_TOP} but while device is sleeping. */
    public static final int PROCESS_STATE_TOP_SLEEPING = 13;

    /** @hide Process is in the background, but it can't restore its state so we want
     * to try to avoid killing it. */
    public static final int PROCESS_STATE_HEAVY_WEIGHT = 14;

    /** @hide Process is in the background but hosts the home activity. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_HOME = 15;

    /** @hide Process is in the background but hosts the last shown activity. */
    public static final int PROCESS_STATE_LAST_ACTIVITY = 16;

    /** @hide Process is being cached for later use and contains activities. */
    @UnsupportedAppUsage
    public static final int PROCESS_STATE_CACHED_ACTIVITY = 17;

    /** @hide Process is being cached for later use and is a client of another cached
     * process that contains activities. */
    public static final int PROCESS_STATE_CACHED_ACTIVITY_CLIENT = 18;

    /** @hide Process is being cached for later use and has an activity that corresponds
     * to an existing recent task. */
    public static final int PROCESS_STATE_CACHED_RECENT = 19;

    /** @hide Process is being cached for later use and is empty. */
    public static final int PROCESS_STATE_CACHED_EMPTY = 20;

    /** @hide Process does not exist. */
    public static final int PROCESS_STATE_NONEXISTENT = 21;

上面这些状态对应的app的adj也是从小到大，adj越高进程越容易被杀
参考网上的一个图

system测

根据app的生命周期调用updateOomAdjLocked更新adj值

image.png

AMS调用updateOomAdjLocked地方异常多，基本上手机只要运行就在调节adj的值
核心函数 computeOomAdjLocked 计算之后调用applyOomAdjLocked之后调用ProcessList.setOomAdj(app.pid, app.uid, app.curAdj);设置adj的值

@GuardedBy("mService")
    private final boolean applyOomAdjLocked(ProcessRecord app, boolean doingAll, long now,
            long nowElapsed) {
    ....
     if (app.curAdj != app.setAdj) {
            ProcessList.setOomAdj(app.pid, app.uid, app.curAdj);
   ....

实现方式是通过socket与lmkd通信

public static void setOomAdj(int pid, int uid, int amt) {
        // This indicates that the process is not started yet and so no need to proceed further.
        if (pid <= 0) {
            return;
        }
        if (amt == UNKNOWN_ADJ)
            return;

        long start = SystemClock.elapsedRealtime();
        ByteBuffer buf = ByteBuffer.allocate(4 * 4);
        buf.putInt(LMK_PROCPRIO);
        buf.putInt(pid);
        buf.putInt(uid);
        buf.putInt(amt);
        writeLmkd(buf, null);
        long now = SystemClock.elapsedRealtime();
        if ((now-start) > 250) {
            Slog.w("ActivityManager", "SLOW OOM ADJ: " + (now-start) + "ms for pid " + pid
                    + " = " + amt);
        }
    }

lmkd实现

static void ctrl_command_handler(int dsock_idx) {
  ...
  switch(cmd) {
    case LMK_TARGET:
        targets = nargs / 2;
        if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
            goto wronglen;
        cmd_target(targets, packet);
        break;
    case LMK_PROCPRIO:
        if (nargs != 3)
            goto wronglen;
        cmd_procprio(packet);
        break;

最终通过cmd_procprio写路径的adj

static void cmd_procprio(LMKD_CTRL_PACKET packet) {
    struct proc *procp;
    char path[80];
    char val[20];
    int soft_limit_mult;
    struct lmk_procprio params;
    bool is_system_server;
    struct passwd *pwdrec;

    lmkd_pack_get_procprio(packet, &params);

    if (params.oomadj < OOM_SCORE_ADJ_MIN ||
        params.oomadj > OOM_SCORE_ADJ_MAX) {
        ALOGE("Invalid PROCPRIO oomadj argument %d", params.oomadj);
        return;
    }

    /* gid containing AID_READPROC required */
    /* CAP_SYS_RESOURCE required */
    /* CAP_DAC_OVERRIDE required */
    snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", params.pid);
    snprintf(val, sizeof(val), "%d", params.oomadj);
    if (!writefilestring(path, val, false)) {
        ALOGW("Failed to open %s; errno=%d: process %d might have been killed",
              path, errno, params.pid);
        /* If this file does not exist the process is dead. */
        return;
    }
   if (use_inkernel_interface) {  // 这里有个判断，是否是用kernel，不用kernel的话后面讲
        return;
    }

最后就是写了一个path，这个就是节点/proc/[pid]/oom-score_adj

具体是按照什么规则查杀呢？查杀的配置厂商可以自己定制
LMK_TARGET 这个是设置查杀adj级别的

static void cmd_target(int ntargets, LMKD_CTRL_PACKET packet) {
    int i;
    struct lmk_target target;
    char minfree_str[PROPERTY_VALUE_MAX];
    char *pstr = minfree_str;
    char *pend = minfree_str + sizeof(minfree_str);
    static struct timespec last_req_tm;
    struct timespec curr_tm;

   ...

    lowmem_targets_size = ntargets;

    /* Override the last extra comma */
    pstr[-1] = '\0';
    property_set("sys.lmk.minfree_levels", minfree_str);

    if (has_inkernel_module) {
        char minfreestr[128];
        char killpriostr[128];

        minfreestr[0] = '\0';
        killpriostr[0] = '\0';

        for (i = 0; i < lowmem_targets_size; i++) {
            char val[40];

            if (i) {
                strlcat(minfreestr, ",", sizeof(minfreestr));
                strlcat(killpriostr, ",", sizeof(killpriostr));
            }

            snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_minfree[i] : 0);
            strlcat(minfreestr, val, sizeof(minfreestr));
            snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_adj[i] : 0);
            strlcat(killpriostr, val, sizeof(killpriostr));
        }

        writefilestring(INKERNEL_MINFREE_PATH, minfreestr, true);
        writefilestring(INKERNEL_ADJ_PATH, killpriostr, true);
    }
}

#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
#define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj"

adb shell cat /sys/module/lowmemorykiller/parameters/minfree
18432,23040,27648,32256,55296,80640
adb shell cat /sys/module/lowmemorykiller/parameters/adj
0,100,200,300,900,906

系统默认是根据

private final int[] mOomAdj = new int[] {
            FOREGROUND_APP_ADJ, VISIBLE_APP_ADJ, PERCEPTIBLE_APP_ADJ,
            PERCEPTIBLE_LOW_APP_ADJ, CACHED_APP_MIN_ADJ, CACHED_APP_LMK_FIRST_ADJ
    };
    // These are the low-end OOM level limits.  This is appropriate for an
    // HVGA or smaller phone with less than 512MB.  Values are in KB.
    private final int[] mOomMinFreeLow = new int[] {
            12288, 18432, 24576,
            36864, 43008, 49152
    };
    // These are the high-end OOM level limits.  This is appropriate for a
    // 1280x800 or larger screen with around 1GB RAM.  Values are in KB.
    private final int[] mOomMinFreeHigh = new int[] {
            73728, 92160, 110592,
            129024, 147456, 184320
    };

private void updateOomLevels(int displayWidth, int displayHeight, boolean write) {
        // Scale buckets from avail memory: at 300MB we use the lowest values to
        // 700MB or more for the top values.
        float scaleMem = ((float) (mTotalMemMb - 350)) / (700 - 350);

        // Scale buckets from screen size.
        int minSize = 480 * 800;  //  384000
        int maxSize = 1280 * 800; // 1024000  230400 870400  .264
        float scaleDisp = ((float)(displayWidth * displayHeight) - minSize) / (maxSize - minSize);
        if (false) {
            Slog.i("XXXXXX", "scaleMem=" + scaleMem);
            Slog.i("XXXXXX", "scaleDisp=" + scaleDisp + " dw=" + displayWidth
                    + " dh=" + displayHeight);
        }

        float scale = scaleMem > scaleDisp ? scaleMem : scaleDisp;
        if (scale < 0) scale = 0;
        else if (scale > 1) scale = 1;
        int minfree_adj = Resources.getSystem().getInteger(
                com.android.internal.R.integer.config_lowMemoryKillerMinFreeKbytesAdjust);
        int minfree_abs = Resources.getSystem().getInteger(
                com.android.internal.R.integer.config_lowMemoryKillerMinFreeKbytesAbsolute);
        if (false) {
            Slog.i("XXXXXX", "minfree_adj=" + minfree_adj + " minfree_abs=" + minfree_abs);
        }

        final boolean is64bit = Build.SUPPORTED_64_BIT_ABIS.length > 0;

        for (int i = 0; i < mOomAdj.length; i++) {
            int low = mOomMinFreeLow[i];
            int high = mOomMinFreeHigh[i];
            if (is64bit) {
                // Increase the high min-free levels for cached processes for 64-bit
                if (i == 4) high = (high * 3) / 2;
                else if (i == 5) high = (high * 7) / 4;
            }
            mOomMinFree[i] = (int)(low + ((high - low) * scale));
        }

        if (minfree_abs >= 0) {
            for (int i = 0; i < mOomAdj.length; i++) {
                mOomMinFree[i] = (int)((float)minfree_abs * mOomMinFree[i]
                        / mOomMinFree[mOomAdj.length - 1]);
            }
        }

        if (minfree_adj != 0) {
            for (int i = 0; i < mOomAdj.length; i++) {
                mOomMinFree[i] += (int)((float) minfree_adj * mOomMinFree[i]
                        / mOomMinFree[mOomAdj.length - 1]);
                if (mOomMinFree[i] < 0) {
                    mOomMinFree[i] = 0;
                }
            }
        }

        // The maximum size we will restore a process from cached to background, when under
        // memory duress, is 1/3 the size we have reserved for kernel caches and other overhead
        // before killing background processes.
        mCachedRestoreLevel = (getMemLevel(ProcessList.CACHED_APP_MAX_ADJ) / 1024) / 3;

        // Ask the kernel to try to keep enough memory free to allocate 3 full
        // screen 32bpp buffers without entering direct reclaim.
        int reserve = displayWidth * displayHeight * 4 * 3 / 1024;
        int reserve_adj = Resources.getSystem().getInteger(
                com.android.internal.R.integer.config_extraFreeKbytesAdjust);
        int reserve_abs = Resources.getSystem().getInteger(
                com.android.internal.R.integer.config_extraFreeKbytesAbsolute);

        if (reserve_abs >= 0) {
            reserve = reserve_abs;
        }

        if (reserve_adj != 0) {
            reserve += reserve_adj;
            if (reserve < 0) {
                reserve = 0;
            }
        }

        if (write) {
            ByteBuffer buf = ByteBuffer.allocate(4 * (2 * mOomAdj.length + 1));
            buf.putInt(LMK_TARGET);
            for (int i = 0; i < mOomAdj.length; i++) {
                buf.putInt((mOomMinFree[i] * 1024)/PAGE_SIZE);
                buf.putInt(mOomAdj[i]);
            }

            writeLmkd(buf, null);  //写socket的值
            SystemProperties.set("sys.sysctl.extra_free_kbytes", Integer.toString(reserve));
        }
        // GB: 2048,3072,4096,6144,7168,8192
        // HC: 8192,10240,12288,14336,16384,20480
    }

system测主要是设置一些配置

kernel测

kernel是真正工作的地方
首先会向内核注册，register_shrinker就是内核内存管理模块注册，注册之后就会遵循linux内核内存管理标准，当内存不足需要回收内存的时候首先会走lowmemkill回收，如果仍然不够内存会走linux的oom_killer回收

static int __init lowmem_init(void)
{
    register_shrinker(&lowmem_shrinker);
    return 0;
}
static void __exit lowmem_exit(void)
{
    unregister_shrinker(&lowmem_shrinker);
}

linux内核内存都会注册

static struct shrinker lowmem_shrinker = {
    .scan_objects = lowmem_scan,
    .count_objects = lowmem_count,
    .seeks = DEFAULT_SEEKS * 16
};

其中.scan_objects = lowmem_scan，而scan_objects就是一个回调函数

unsigned long (*scan_objects)(struct shrinker *,
                      struct shrink_control *sc);
while (total_scan >= batch_size ||
           total_scan >= freeable) {
        unsigned long ret;
        unsigned long nr_to_scan = min(batch_size, total_scan);

        shrinkctl->nr_to_scan = nr_to_scan;
        ret = shrinker->scan_objects(shrinker, shrinkctl);
        if (ret == SHRINK_STOP)
            break;

当内核需要回收内存的时候会遍历注册的模块，首先走lowmemorykiller模块，走lowmem_scan函数

static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
{
    struct task_struct *tsk;
    struct task_struct *selected = NULL;
    unsigned long rem = 0;
    int tasksize;
    int i;
    short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
    int minfree = 0;
    int selected_tasksize = 0;
    short selected_oom_score_adj;
    int array_size = ARRAY_SIZE(lowmem_adj);
    int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; //空闲页- 保留页
    int other_file = global_page_state(NR_FILE_PAGES) -
                        global_page_state(NR_SHMEM) -
                        total_swapcache_pages();  // 文件页 - 共享内存 - swapcache

    if (lowmem_adj_size < array_size)
        array_size = lowmem_adj_size;
    if (lowmem_minfree_size < array_size)
        array_size = lowmem_minfree_size;
    for (i = 0; i < array_size; i++) {  // 根据级别查找内存
        minfree = lowmem_minfree[i];  
        if (other_free < minfree && other_file < minfree) {
            min_score_adj = lowmem_adj[i];  //当前内存小于某一个级别
            break;
        }
    }

    lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
            sc->nr_to_scan, sc->gfp_mask, other_free,
            other_file, min_score_adj);

    if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
        lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
                 sc->nr_to_scan, sc->gfp_mask);
        return 0;
    }

    selected_oom_score_adj = min_score_adj; //selected_oom_score_adj 确定要查杀的级别

    rcu_read_lock();
    for_each_process(tsk) {
        struct task_struct *p;
        short oom_score_adj;

        if (tsk->flags & PF_KTHREAD)
            continue;

        p = find_lock_task_mm(tsk);
        if (!p)
            continue;

        if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
            time_before_eq(jiffies, lowmem_deathpending_timeout)) {
            task_unlock(p);
            rcu_read_unlock();
            return 0;
        }
        oom_score_adj = p->signal->oom_score_adj;
        if (oom_score_adj < min_score_adj) {
            task_unlock(p);
            continue;
        }
        tasksize = get_mm_rss(p->mm);
        task_unlock(p);
        if (tasksize <= 0)
            continue;
        if (selected) {
            if (oom_score_adj < selected_oom_score_adj)
                continue;
            if (oom_score_adj == selected_oom_score_adj &&
                tasksize <= selected_tasksize)
                continue;
        }
        selected = p;  //找到要查杀的任务
        selected_tasksize = tasksize;
        selected_oom_score_adj = oom_score_adj;
        lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n",
                 p->comm, p->pid, oom_score_adj, tasksize);
    }
    if (selected) {
        long cache_size = other_file * (long)(PAGE_SIZE / 1024);
        long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
        long free = other_free * (long)(PAGE_SIZE / 1024);
        trace_lowmemory_kill(selected, cache_size, cache_limit, free);
        lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
                "   to free %ldkB on behalf of '%s' (%d) because\n" \
                "   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
                "   Free memory is %ldkB above reserved\n",
                 selected->comm, selected->pid,
                 selected_oom_score_adj,
                 selected_tasksize * (long)(PAGE_SIZE / 1024),
                 current->comm, current->pid,
                 cache_size, cache_limit,
                 min_score_adj,
                 free);
        lowmem_deathpending_timeout = jiffies + HZ;
        set_tsk_thread_flag(selected, TIF_MEMDIE);
        send_sig(SIGKILL, selected, 0);  //发信号杀进程
        rem += selected_tasksize;
    }

    lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
             sc->nr_to_scan, sc->gfp_mask, rem);
    rcu_read_unlock();
    return rem;
}

不用kernel去查杀

/* default to old in-kernel interface if no memory pressure events */
static bool use_inkernel_interface = true;
static bool has_inkernel_module;

static void cmd_procprio(LMKD_CTRL_PACKET packet) {
 ...
 if (use_inkernel_interface) {
        return;
    }
 if (per_app_memcg) {
        if (params.oomadj >= 900) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 800) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 700) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 600) {
            // Launcher should be perceptible, don't kill it.
            params.oomadj = 200;
            soft_limit_mult = 1;
        } else if (params.oomadj >= 500) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 400) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 300) {
            soft_limit_mult = 1;
        } else if (params.oomadj >= 200) {
            soft_limit_mult = 8;
        } else if (params.oomadj >= 100) {
            soft_limit_mult = 10;
        } else if (params.oomadj >=   0) {
            soft_limit_mult = 20;
        } else {
            // Persistent processes will have a large
            // soft limit 512MB.
            soft_limit_mult = 64;
        }

        snprintf(path, sizeof(path), MEMCG_SYSFS_PATH
                 "apps/uid_%d/pid_%d/memory.soft_limit_in_bytes",
                 params.uid, params.pid);
        snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);

        /*
         * system_server process has no memcg under /dev/memcg/apps but should be
         * registered with lmkd. This is the best way so far to identify it.
         */
        is_system_server = (params.oomadj == SYSTEM_ADJ &&
                            (pwdrec = getpwnam("system")) != NULL &&
                            params.uid == pwdrec->pw_uid);
        writefilestring(path, val, !is_system_server);
    }

    procp = pid_lookup(params.pid);
    if (!procp) {
            procp = malloc(sizeof(struct proc));
            if (!procp) {
                // Oh, the irony.  May need to rebuild our state.
                return;
            }

            procp->pid = params.pid;
            procp->uid = params.uid;
            procp->oomadj = params.oomadj;
            proc_insert(procp);
    } else {
        proc_unslot(procp);
        procp->oomadj = params.oomadj;
        proc_slot(procp);
    }
}

static int init(void) {
  ...
   has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);
    use_inkernel_interface = has_inkernel_module;

这个需要厂商配置，看了oppo android 9 小米都有INKERNEL_MINFREE_PATH目录，意思就是走kernel的路径

如果不走kernel初始化的时候

static bool init_mp_common(enum vmpressure_level level) {
    int mpfd;
    int evfd;
    int evctlfd;
    char buf[256];
    struct epoll_event epev;
    int ret;
    int level_idx = (int)level;
    const char *levelstr = level_name[level_idx];

    /* gid containing AID_SYSTEM required */
    mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);
    if (mpfd < 0) {
        ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
        goto err_open_mpfd;
    }

    evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC);
    if (evctlfd < 0) {
        ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
        goto err_open_evctlfd;
    }

    evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
    if (evfd < 0) {
        ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno);
        goto err_eventfd;
    }

    ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr);
    if (ret >= (ssize_t)sizeof(buf)) {
        ALOGE("cgroup.event_control line overflow for level %s", levelstr);
        goto err;
    }

    ret = TEMP_FAILURE_RETRY(write(evctlfd, buf, strlen(buf) + 1));
    if (ret == -1) {
        ALOGE("cgroup.event_control write failed for level %s; errno=%d",
              levelstr, errno);
        goto err;
    }

    epev.events = EPOLLIN;
    /* use data to store event level */
    vmpressure_hinfo[level_idx].data = level_idx;
    vmpressure_hinfo[level_idx].handler = mp_event_common;
    epev.data.ptr = (void *)&vmpressure_hinfo[level_idx];
    ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev);
    if (ret == -1) {
        ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno);
        goto err;
    }
    maxevents++;
    mpevfd[level] = evfd;
    close(evctlfd);
    return true;

err:
    close(evfd);
err_eventfd:
    close(evctlfd);
err_open_evctlfd:
    close(mpfd);
err_open_mpfd:
    return false;
}
#define MEMCG_SYSFS_PATH "/dev/memcg/"
#define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
#define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"

oppo android 9 小米都没有/dev/memcg/目录，证实走的是kernel
走user空间这块，主要逻辑就在lmkd里面mp_event_common函数

if (use_minfree_levels) {
        int i;

        other_free = mi.field.nr_free_pages - zi.field.totalreserve_pages;
        if (mi.field.nr_file_pages > (mi.field.shmem + mi.field.unevictable + mi.field.swap_cached)) {
            other_file = (mi.field.nr_file_pages - mi.field.shmem -
                          mi.field.unevictable - mi.field.swap_cached);
        } else {
            other_file = 0;
        }

        min_score_adj = OOM_SCORE_ADJ_MAX + 1;
        for (i = 0; i < lowmem_targets_size; i++) {
            minfree = lowmem_minfree[i];
            if (other_free < minfree && other_file < minfree) {
                min_score_adj = lowmem_adj[i];
                break;
            }
        }

        if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
            if (debug_process_killing) {
                ALOGI("Ignore %s memory pressure event "
                      "(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
                      level_name[level], other_free * page_k, other_file * page_k,
                      (long)lowmem_minfree[lowmem_targets_size - 1] * page_k);
            }
            return;
        }

        goto do_kill;
    }

    if (level == VMPRESS_LEVEL_LOW) {
        record_low_pressure_levels(&mi);
    }

    if (level_oomadj[level] > OOM_SCORE_ADJ_MAX) {
        /* Do not monitor this pressure level */
        return;
    }

    if ((mem_usage = get_memory_usage(&mem_usage_file_data)) < 0) {
        goto do_kill;
    }
    if ((memsw_usage = get_memory_usage(&memsw_usage_file_data)) < 0) {
        goto do_kill;
    }

跟kernel查杀策略类似，但是可以更加细分

if (per_app_memcg) {
        if (params.oomadj >= 900) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 800) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 700) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 600) {
            // Launcher should be perceptible, don't kill it.
            params.oomadj = 200;
            soft_limit_mult = 1;
        } else if (params.oomadj >= 500) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 400) {
            soft_limit_mult = 0;
        } else if (params.oomadj >= 300) {
            soft_limit_mult = 1;
        } else if (params.oomadj >= 200) {
            soft_limit_mult = 8;
        } else if (params.oomadj >= 100) {
            soft_limit_mult = 10;
        } else if (params.oomadj >=   0) {
            soft_limit_mult = 20;
        } else {
            // Persistent processes will have a large
            // soft limit 512MB.
            soft_limit_mult = 64;
        }

        snprintf(path, sizeof(path), MEMCG_SYSFS_PATH
                 "apps/uid_%d/pid_%d/memory.soft_limit_in_bytes",
                 params.uid, params.pid);
        snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);

        /*
         * system_server process has no memcg under /dev/memcg/apps but should be
         * registered with lmkd. This is the best way so far to identify it.
         */
        is_system_server = (params.oomadj == SYSTEM_ADJ &&
                            (pwdrec = getpwnam("system")) != NULL &&
                            params.uid == pwdrec->pw_uid);
        writefilestring(path, val, !is_system_server);
    }

杀进程

do_kill:
    if (low_ram_device) {
        /* For Go devices kill only one task */
        if (find_and_kill_process(level_oomadj[level]) == 0) {
            if (debug_process_killing) {
                ALOGI("Nothing to kill");
            }
        } else {
            meminfo_log(&mi);
        }
    } else {
        int pages_freed;
        static struct timespec last_report_tm;
        static unsigned long report_skip_count = 0;

        if (!use_minfree_levels) {
            /* Free up enough memory to downgrate the memory pressure to low level */
            if (mi.field.nr_free_pages >= low_pressure_mem.max_nr_free_pages) {
                if (debug_process_killing) {
                    ALOGI("Ignoring pressure since more memory is "
                        "available (%" PRId64 ") than watermark (%" PRId64 ")",
                        mi.field.nr_free_pages, low_pressure_mem.max_nr_free_pages);
                }
                return;
            }
            min_score_adj = level_oomadj[level];
        }

        pages_freed = find_and_kill_process(min_score_adj);


static int find_and_kill_process(int min_score_adj) {
    int i;
    int killed_size = 0;

#ifdef LMKD_LOG_STATS
    bool lmk_state_change_start = false;
#endif

    for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
        struct proc *procp;

        while (true) {
            procp = kill_heaviest_task ?
                proc_get_heaviest(i) : proc_adj_lru(i);

            if (!procp)
                break;

            killed_size = kill_one_process(procp, min_score_adj);
            if (killed_size >= 0) {
#ifdef LMKD_LOG_STATS
                if (enable_stats_log && !lmk_state_change_start) {
                    lmk_state_change_start = true;
                    stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
                                                  LMK_STATE_CHANGE_START);
                }
#endif
                break;

主动杀

removeLruProcessLocked

final void removeLruProcessLocked(ProcessRecord app) {
        int lrui = mLruProcesses.lastIndexOf(app);
        if (lrui >= 0) {
            if (!app.killed) {
                if (app.isPersistent()) {
                    Slog.w(TAG, "Removing persistent process that hasn't been killed: " + app);
                } else {
                    Slog.wtfStack(TAG, "Removing process that hasn't been killed: " + app);
                    if (app.pid > 0) {
                        killProcessQuiet(app.pid);
                        ProcessList.killProcessGroup(app.uid, app.pid);
                    } else {
                        app.pendingStart = false;
                    }
                }

removeLruProcessLocked 调用killProcessGroup

static void killProcessGroup(int uid, int pid) {
        /* static; one-time init here */
        if (sKillHandler != null) {
            sKillHandler.sendMessage(
                    sKillHandler.obtainMessage(KillHandler.KILL_PROCESS_GROUP_MSG, uid, pid));
        } else {
            Slog.w(TAG, "Asked to kill process group before system bringup!");
            Process.killProcessGroup(uid, pid);
        }
    }

最终都会调到

 case KILL_PROCESS_GROUP_MSG:
                    Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER, "killProcessGroup");
                    Process.killProcessGroup(msg.arg1 /* uid */, msg.arg2 /* pid */);
                    Trace.traceEnd(Trace.TRACE_TAG_ACTIVITY_MANAGER);
Process.killProcessGroup(msg.arg1 /* uid */, msg.arg2 /* pid */);

调到native

// Erase all pids that will be killed when we kill the process groups.
    for (auto it = pids.begin(); it != pids.end();) {
        pid_t pgid = getpgid(*it);
        if (pgids.count(pgid) == 1) {
            it = pids.erase(it);
        } else {
            ++it;
        }
    }

    // Kill all process groups.
    for (const auto pgid : pgids) {
        LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
                     << " as part of process cgroup " << initialPid;

        if (kill(-pgid, signal) == -1 && errno != ESRCH) {
            PLOG(WARNING) << "kill(" << -pgid << ", " << signal << ") failed";
        }
    }

    // Kill remaining pids.
    for (const auto pid : pids) {
        LOG(VERBOSE) << "Killing pid " << pid << " in uid " << uid << " as part of process cgroup "
                     << initialPid;

        if (kill(pid, signal) == -1 && errno != ESRCH) {
            PLOG(WARNING) << "kill(" << pid << ", " << signal << ") failed";
        }
    }

整个进程组杀掉