user版本连接wifi必现重启问题总结

问题现象

  • 复现步骤
    • Android 7.0平台(刚bring up完成)
    • user版本只要连接特定wifi, system_server进程就必现native crash。
    • userdebug版本没有此问题。

分析定位

初步分析

  • tombstone文件如下

    ABI: 'x86_64'
    pid: 5891, tid: 7173, name: Thread-8  >>> system_server <<<
    signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x3400a6
      rax 000000006f528f00  rbx 00007fa0a04f7e30  rcx 00007fa0a04f7e01  rdx 0000000000000001
      rsi 00007fa0a04f7ed4  rdi 000000006f1ed630
      r8  0000000000000002  r9  00007fa0a04f7d38  r10 000000006f1c38d8  r11 0000000000000000
      r12 0000000000200015  r13 000000000034002e  r14 00007fa0a04f7ed8  r15 000000006f528f00
      cs  0000000000000033  ss  000000000000002b
      rip 00007fa0bde2338d  rbp 00007fa0a04f7d80  rsp 00007fa0a04f7be0  eflags 0000000000010206
    
    backtrace:
      #00 pc 000000000057738d  /system/lib64/libart.so (_ZN3art12InvokeMethodERKNS_33ScopedObjectAccessAlreadyRunnableEP8_jobjectS4_S4_m+125)
      #01 pc 00000000004cfad8  /system/lib64/libart.so (_ZN3artL24Constructor_newInstance0EP7_JNIEnvP8_jobjectP13_jobjectArray+1432)
      #02 pc 00000000006e3d4d  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (java.io.UnixFileSystem.canonicalize0 [DEDUPED]+235)
      #03 pc 0000000000b22caf  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.x509.X500Name.asX500Principal+157)
      #04 pc 0000000000b327a7  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.x509.X509CertInfo.getX500Name+517)
      #05 pc 0000000000b34b45  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.x509.X509CertInfo.get+835)
      #06 pc 0000000000b2fe15  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.x509.X509CertImpl.getSubjectX500Principal+99)
      #07 pc 0000000000a93baf  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PolicyChecker.mergeExplicitPolicy+93)
      #08 pc 0000000000a9341e  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PolicyChecker.checkPolicy+2652)
      #09 pc 0000000000a98df1  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PolicyChecker.check+95)
      #10 pc 0000000000a91f9b  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PKIXMasterCertPathValidator.validate+2265)
      #11 pc 0000000000a907d1  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PKIXCertPathValidator.validate+2895)
      #12 pc 0000000000a911cc  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PKIXCertPathValidator.validate+1546)
      #13 pc 0000000000a9166b  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (sun.security.provider.certpath.PKIXCertPathValidator.engineValidate+233)
      #14 pc 0000000000819f52  /system/framework/x86_64/boot-core-oj.oat (offset 0x660000) (java.security.cert.CertPathValidator.validate+64)
      #15 pc 00000000000b6d14  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.verifyChain+1266)
      #16 pc 00000000000b5fab  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.checkTrustedRecursive+2697)
      #17 pc 00000000000b57f1  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.checkTrustedRecursive+719)
      #18 pc 00000000000b5bb0  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.checkTrustedRecursive+1678)
      #19 pc 00000000000b5207  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.checkTrusted+645)
      #20 pc 00000000000b54a7  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.checkTrusted+421)
      #21 pc 00000000000b7713  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.TrustManagerImpl.checkServerTrusted+273)
      #22 pc 00000000000acf15  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.Platform.checkServerTrusted+323)
      #23 pc 00000000000a4885  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.OpenSSLSocketImpl.verifyCertificateChain+787)
      #24 pc 00000000001a6564  /system/lib64/libart.so (art_quick_invoke_stub+756)
      #25 pc 00000000001b4727  /system/lib64/libart.so (_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc+231)
      #26 pc 0000000000575967  /system/lib64/libart.so (_ZN3artL18InvokeWithArgArrayERKNS_33ScopedObjectAccessAlreadyRunnableEPNS_9ArtMethodEPNS_8ArgArrayEPNS_6JValueEPKc+87)
      #27 pc 00000000005771be  /system/lib64/libart.so (_ZN3art35InvokeVirtualOrInterfaceWithVarArgsERKNS_33ScopedObjectAccessAlreadyRunnableEP8_jobjectP10_jmethodIDP13__va_list_tag+382)
      #28 pc 000000000046507c  /system/lib64/libart.so (_ZN3art3JNI15CallVoidMethodVEP7_JNIEnvP8_jobjectP10_jmethodIDP13__va_list_tag+860)
      #29 pc 000000000001eb51  /system/lib64/libjavacrypto.so (_ZN7_JNIEnv14CallVoidMethodEP8_jobjectP10_jmethodIDz+161)
      #30 pc 000000000001f3e7  /system/lib64/libjavacrypto.so
      #31 pc 0000000000021468  /system/lib64/libssl.so
      #32 pc 0000000000015fd8  /system/lib64/libssl.so
      #33 pc 00000000000149ab  /system/lib64/libssl.so
      #34 pc 000000000001954b  /system/lib64/libjavacrypto.so
      #35 pc 000000000008061a  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.NativeCrypto.SSL_do_handshake+376)
      #36 pc 00000000000a33aa  /system/framework/x86_64/boot-conscrypt.oat (offset 0x70000) (com.android.org.conscrypt.OpenSSLSocketImpl.startHandshake+1944)
      #37 pc 00000000000986ba  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.Connection.connectTls+488)
      #38 pc 00000000000981d2  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.Connection.connectSocket+192)
      #39 pc 000000000009a17e  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.Connection.connect+860)
      #40 pc 000000000009a4ed  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.Connection.connectAndSetOwner+203)
      #41 pc 00000000000b0ffd  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.OkHttpClient$1.connectAndSetOwner+75)
      #42 pc 00000000000cede7  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.internal.http.HttpEngine.connect+501)
      #43 pc 00000000000d32d5  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.internal.http.HttpEngine.sendRequest+755)
      #44 pc 00000000000dba50  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.internal.huc.HttpURLConnectionImpl.execute+222)
      #45 pc 00000000000dbfe3  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.internal.huc.HttpURLConnectionImpl.getResponse+145)
      #46 pc 00000000000de079  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.internal.huc.HttpURLConnectionImpl.getInputStream+135)
      #47 pc 00000000000e0648  /system/framework/x86_64/boot-okhttp.oat (offset 0x8f000) (com.android.okhttp.internal.huc.HttpsURLConnectionImpl.getInputStream+54)
      #48 pc 00000000011d9af0  /system/framework/oat/x86_64/services.odex (offset 0xc82000)
    
  • 根据堆栈,初步分析ArtMethod*指向的内存被篡改.

    由于user版本, 只有一个tombstone文件,无法获取更多有用信息.

  • 调整版本配置,获取core dump后,看到出问题附近内存有RSA encryption相关的数据

    ArtMethod 由前面的abstract_method计算而来,m = 0x000000006f528f00,
    该地址附近的内存都被一些加密相关的字符篡改了.

    000000006f528f00 004900570034002e 002e003100480054  ..4.W.I.T.H.1...
    000000006f528f10 00340038002e0032 00310031002e0030  2...8.4.0...1.1.
    000000006f528f20 0039003400350033 0031002e0031002e  3.5.4.9...1...1.
    000000006f528f30 000000340031002e 000000006f2139b8  ..1.4....9!o....
    000000006f528f40 1a1723730000000d 0032004100480053  ....s#..S.H.A.2.
    000000006f528f50 0049005700360035 0053005200480054  5.6.W.I.T.H.R.S.
    000000006f528f60 0000000000000041 000000006f2139b8  A........9!o....
    000000006f528f70 968ed33600000017 0032004100480053  ....6...S.H.A.2.
    000000006f528f80 0069005700360035 0053005200680074  5.6.W.i.t.h.R.S.
    000000006f528f90 0063006e00450041 0074007000790072  A.E.n.c.r.y.p.t.
    000000006f528fa0 0000006e006f0069 000000006f2139b8  i.o.n....9!o....
    000000006f528fb0 eeb292290000002e 00360031002e0032  ....)...2...1.6.
    000000006f528fc0 003000340038002e 0031002e0031002e  ..8.4.0...1...1.
    000000006f528fd0 0033002e00310030 0032002e0034002e  0.1...3...4...2.
    

user/userdebug版本区别

  • 从ART虚拟机角度而言,user和userdebug配置的dexpreopt不同:user版本配置为true, userdebug版本配置为false
    • user版本配置为false后,问题不再出现
    • userdebug版本配置为true后,问题必现
  • 为便于调试,基于userdebug版本,将dexpreopt配置为true,编译出新的image。目前问题表明跟dexpreopt有点关联。

证明非内存篡改

  • 出问题ArtMethod* = 0x7042af00 位于下面的*.art文件

    0x700cb000         0x705f1000   0x526000        0x0 /data/dalvik-cache/x86_64/system@framework@boot-core-oj.art
    
  • 查看ArtMethod*的52个bytes内容

    (gdb) x /52xb 0x7042af00
    0x7042af00:     0x2e    0x00    0x38    0x00    0x34    0x00    0x30    0x00
    0x7042af08:     0x2e    0x00    0x31    0x00    0x2e    0x00    0x31    0x00
    0x7042af10:     0x30    0x00    0x31    0x00    0x2e    0x00    0x33    0x00
    0x7042af18:     0x2e    0x00    0x34    0x00    0x2e    0x00    0x32    0x00
    0x7042af20:     0x2e    0x00    0x34    0x00    0x57    0x00    0x49    0x00
    0x7042af28:     0x54    0x00    0x48    0x00    0x31    0x00    0x2e    0x00
    0x7042af30:     0x32    0x00    0x2e    0x00
    
  • 查看*.art的指定偏移的52个bytes

    $ hexdump -C -s 3538688 -n 52 boot-core-oj.art 
    0035ff00  2e 00 38 00 34 00 30 00  2e 00 31 00 2e 00 31 00  |..8.4.0...1...1.|
    0035ff10  30 00 31 00 2e 00 33 00  2e 00 34 00 2e 00 32 00  |0.1...3...4...2.|
    0035ff20  2e 00 34 00 57 00 49 00  54 00 48 00 31 00 2e 00  |..4.W.I.T.H.1...|
    0035ff30  32 00 2e 00                                       |2...|
    0035ff34
    
  • 通过以上对比,说明这个boot-core-oj.art 里面的RSA加密之类的东西已经存在了, 并不是加载到内存后被篡改成这样的.

    目前想到有两种可能性:

    • ArtMethod地址错了
    • boot-core-oj.art里面的内容生成的时候就错了(只要访问到这里的内存就报错).

    另外,https模块同事分析java堆栈,发现函数调用存在异常,逻辑上根本不可能调用到。所以上面第二种可能性大些。

board对比差异

  • 对比同分支下其它board生成image的差异

    不论是arm平台还是x86平台,出问题board生成的相关boot image有些奇怪.
    根据文件大小,

    • boot.oat应是boot-radio_interactor_common.oat
    • boot-core-oj.oat应是boot.oat
board_image_vs.png
  • 对比环境变量

    • 出问题board
      BOOTCLASSPATH=/system/framework/radio_interactor_common.jar:/system/framework/core-oj.jar:/system/framework/core-libart.jar:/system/framework/conscrypt.jar:/system/framework/okhttp.jar:/system/framework/core-junit.jar:/system/framework/bouncycastle.jar:/system/framework/ext.jar:/system/framework/framework.jar:/system/framework/telephony-common.jar:/system/framework/voip-common.jar:/system/framework/ims-common.jar:/system/framework/apache-xml.jar:/system/framework/org.apache.http.legacy.boot.jar
      
    • 正常board
      BOOTCLASSPATH=/system/framework/core-oj.jar:/system/framework/core-libart.jar:/system/framework/conscrypt.jar:/system/framework/okhttp.jar:/system/framework/core-junit.jar:/system/framework/bouncycastle.jar:/system/framework/ext.jar:/system/framework/framework.jar:/system/framework/telephony-common.jar:/system/framework/voip-common.jar:/system/framework/ims-common.jar:/system/framework/apache-xml.jar:/system/framework/org.apache.http.legacy.boot.jar:/system/framework/radio_interactor_common.jar
      
    • 对比可发现,radio_interactor_common.jar在BOOTCLASSPATH中的顺序不同.
      而通过走读代码, build的相关描述如下:
    # dex preopt on the bootclasspath produces multiple files.  The first dex file
    # is converted into to boot.art (to match the legacy assumption that boot.art
    # exists), and the rest are converted to boot-<name>.art.
    # In addition, each .art file has an associated .oat file.
    LIBART_TARGET_BOOT_ART_EXTRA_FILES := $(foreach jar,$(wordlist 2,999,$(LIBART_TARGET_BOOT_JARS)),boot-$(jar).art boot-$(jar).oat)
    LIBART_TARGET_BOOT_ART_EXTRA_FILES += boot.oat
    

    以及

    # The order of PRODUCT_BOOT_JARS matters.
    PRODUCT_BOOT_JARS := \
      core-oj \
      core-libart \
      conscrypt \
      okhttp \
      core-junit \
      bouncycastle \
      ext \
      framework \
      telephony-common \
      voip-common \
      ims-common \
      apache-xml \
      org.apache.http.legacy.boot
    

    可知build系统默认将PRODUCT_BOOT_JARS中的第一个编译为boot.oat/boot.art, 其它为则编译为boot-${jar}.oat/boot-${jar}.art文件

    这说明board配置出了问题: radio_interactor_common被错误地配置到了PRODUCT_BOOT_JARS的最前面。

Root Cause

  • 查看radio_interactor_common的使用
PRODUCT_BOOT_JARS += radio_interactor_common
  • 再查看相关的调用顺序,最终找到root cause。
$(call inherit-product, $(PLATDIR)/common/device.mk)
$(call inherit-product, $(SRC_TARGET_DIR)/product/core_64_bit.mk)
$(call inherit-product, $(SRC_TARGET_DIR)/product/aosp_base_telephony.mk)
$(call inherit-product, $(PLATDIR)/common/proprietories.mk)
- device.mk最终include到前面的  PRODUCT_BOOT_JARS += radio_interactor_common
- $(SRC_TARGET_DIR)/product/aosp_base_telephony.mk最终会include到系统默认的boot class: 
build/target/product/core_minimal.mk
- 对比正常board, 都是先include系统默认的boot class, 再追加radio_interactor_common

解决方案

  • 验证
    • 调整*.mk文件incldue顺序后, 查看编译后的image文件
      $ tree system/framework/x86*
      system/framework/x86
      ├── boot-apache-xml.art
      ├── boot-apache-xml.oat
      ├── boot.art
      ├── boot-bouncycastle.art
      ├── boot-bouncycastle.oat
      ├── boot-conscrypt.art
      ├── boot-conscrypt.oat
      ├── boot-core-junit.art
      ├── boot-core-junit.oat
      ├── boot-core-libart.art
      ├── boot-core-libart.oat
      ├── boot-ext.art
      ├── boot-ext.oat
      ├── boot-framework.art
      ├── boot-framework.oat
      ├── boot-ims-common.art
      ├── boot-ims-common.oat
      ├── boot.oat
      ├── boot-okhttp.art
      ├── boot-okhttp.oat
      ├── boot-org.apache.http.legacy.boot.art
      ├── boot-org.apache.http.legacy.boot.oat
      ├── boot-radio_interactor_common.art
      ├── boot-radio_interactor_common.oat
      ├── boot-telephony-common.art
      ├── boot-telephony-common.oat
      ├── boot-voip-common.art
      └── boot-voip-common.oat
      system/framework/x86_64
      ├── boot-apache-xml.art
      ├── boot-apache-xml.oat
      ├── boot.art
      ├── boot-bouncycastle.art
      ├── boot-bouncycastle.oat
      ├── boot-conscrypt.art
      ├── boot-conscrypt.oat
      ├── boot-core-junit.art
      ├── boot-core-junit.oat
      ├── boot-core-libart.art
      ├── boot-core-libart.oat
      ├── boot-ext.art
      ├── boot-ext.oat
      ├── boot-framework.art
      ├── boot-framework.oat
      ├── boot-ims-common.art
      ├── boot-ims-common.oat
      ├── boot.oat
      ├── boot-okhttp.art
      ├── boot-okhttp.oat
      ├── boot-org.apache.http.legacy.boot.art
      ├── boot-org.apache.http.legacy.boot.oat
      ├── boot-radio_interactor_common.art
      ├── boot-radio_interactor_common.oat
      ├── boot-telephony-common.art
      ├── boot-telephony-common.oat
      ├── boot-voip-common.art
      └── boot-voip-common.oat
      
      
    • 编译出新版本后,问题不再复现。
  • 为防止以后再掉在这样的坑里,修改build系统代码,添加check机制:当检测到配置不对时报错,这样在bringup阶段就暴露出问题.
    diff --git a/core/dex_preopt_libart.mk b/core/dex_preopt_libart.mk
    index b469dc0..7145fed 100644
    --- a/core/dex_preopt_libart.mk
    +++ b/core/dex_preopt_libart.mk
    @@ -87,6 +87,9 @@ LIBART_TARGET_BOOT_DEX_FILES := $(foreach jar,$(LIBART_TARGET_BOOT_JARS),$(call
     # is converted into to boot.art (to match the legacy assumption that boot.art
     # exists), and the rest are converted to boot-<name>.art.
     # In addition, each .art file has an associated .oat file.
    +ifneq (core-oj,$(word 1,$(LIBART_TARGET_BOOT_JARS)))
    +$(error "core-oj" must be the first file in <PRODUCT_BOOT_JARS> but now is "$(word 1,$(LIBART_TARGET_BOOT_JARS))")
    +endif
     LIBART_TARGET_BOOT_ART_EXTRA_FILES := $(foreach jar,$(wordlist 2,999,$(LIBART_TARGET_BOOT_JARS)),boot-$(jar).art boot-$(jar).oat)
     LIBART_TARGET_BOOT_ART_EXTRA_FILES += boot.oat
    
    
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • Spring Cloud为开发人员提供了快速构建分布式系统中一些常见模式的工具(例如配置管理,服务发现,断路器,智...
    卡卡罗2017阅读 134,859评论 18 139
  • Spring Boot 参考指南 介绍 转载自:https://www.gitbook.com/book/qbgb...
    毛宇鹏阅读 46,935评论 6 342
  • 问题现象 复现步骤Android 7.0平台 + W16.49.4/W16.50.4版本monkey测试时低概率出...
    dumphex阅读 4,736评论 0 4
  • 才德的妇人谁能得着呢?她的价值远胜过珍珠。 31:11她丈夫心里倚靠她,必不缺少利益。 31:12她一生使...
    要心里柔和谦卑阅读 962评论 0 1
  • 记忆是深沉的, 记忆中的人物, 想着你笑, 想着你哭, 莫名的想笑, 想和你联系, 总找不到联系理由。
    莫离痕阅读 165评论 0 0