偷用网上的一张图概述Socket编程模型: LocalSocket也是按照这个模型来组织的。与图中略有不同的是Android LocalSocket是本地Socket,不需要TCP三路握手。 对于socket编程不熟悉的朋友,需要提取的内容有:
对于服务端需要做好以下准备,才能被客户端连接: 1.调用socket() 函数创建一个socket,这个socket是用于监听和接受客户端的连接请求的 2.调用bind()函数绑定通信地址,对于网络通信需要绑定的地址为IP:PORT的形式,而对于Android本地通信而言,需要绑定的地址为一个本地文件 3.调用listen()函数监听是否有客户端连接请求,能否连接成功需要后续accept()的判断 4.调用accept()函数接受和处理客户端的连接请求,如果连接成功返回一个新的socket,这个新的socket是用来和客户端进行通信的
对于客户端需要做以下准备去连接服务端: 1.调用socket()函数创建一个socket 2.调用connect()函数进行连接
了解了以上基本脉络后,接下来以Android的installd进程为例详细解析Android的LocalSocket(基于Android 7.0源码)。
installd进程是在init进程中启动的,相关启动内容:
/* 文件:frameworks/native/cmds/installd/installd.rc */ service installd /system/bin/installd class main socket installd stream 600 system system字面上可以看到在installd.rc中使用socket installd stream 600 system system创建了与installd进程相关的socket,其具体的创建流程是在init中完成的,接下来过一下这个流程。
/* 文件:system/core/init/service.cpp */ Service::OptionHandlerMap::Map& Service::OptionHandlerMap::map() const { constexpr std::size_t kMax = std::numeric_limits<std::size_t>::max(); static const Map option_handlers = { {"class", {1, 1, &Service::HandleClass}}, {"console", {0, 0, &Service::HandleConsole}}, {"critical", {0, 0, &Service::HandleCritical}}, {"disabled", {0, 0, &Service::HandleDisabled}}, {"group", {1, NR_SVC_SUPP_GIDS + 1, &Service::HandleGroup}}, {"ioprio", {2, 2, &Service::HandleIoprio}}, {"keycodes", {1, kMax, &Service::HandleKeycodes}}, {"oneshot", {0, 0, &Service::HandleOneshot}}, {"onrestart", {1, kMax, &Service::HandleOnrestart}}, {"seclabel", {1, 1, &Service::HandleSeclabel}}, {"setenv", {2, 2, &Service::HandleSetenv}}, {"socket", {3, 6, &Service::HandleSocket}},//处理socket关键字的函数为HandleSocket {"user", {1, 1, &Service::HandleUser}}, {"writepid", {1, kMax, &Service::HandleWritepid}}, }; return option_handlers; } /* 文件:system/core/init/service.cpp */ /* name type perm [ uid gid context ] */ bool Service::HandleSocket(const std::vector<std::string>& args, std::string* err) { //指定的socket传输信息方式只能为dgram、stream或seqpacket,这里指定为stream if (args[2] != "dgram" && args[2] != "stream" && args[2] != "seqpacket") { *err = "socket type must be 'dgram', 'stream' or 'seqpacket'"; return false; } //获取指定的读写权限,这里为600 int perm = std::stoul(args[3], 0, 8); //获取指定的owner和group,可选项,这里指定为system system uid_t uid = args.size() > 4 ? decode_uid(args[4].c_str()) : 0; gid_t gid = args.size() > 5 ? decode_uid(args[5].c_str()) : 0; //获取指定的selinux context,可选项,这里没有指定 std::string socketcon = args.size() > 6 ? args[6] : ""; //sockets_的定义std::vector<SocketInfo> sockets_ //将指定的信息整合成一个SocketInfo对象然后放入到vector中 sockets_.emplace_back(args[1], args[2], uid, gid, perm, socketcon); return true; }HandleSocket()函数中完成了对.rc文件中socket声明的信息的收集,接下来就是对信息的处理。
/* 文件:system/core/init/service.cpp */ bool Service::Start() { //...... for (const auto& si : sockets_) { //遍历sockets_ int socket_type = ((si.type == "stream" ? SOCK_STREAM : (si.type == "dgram" ? SOCK_DGRAM : SOCK_SEQPACKET))); const char* socketcon = !si.socketcon.empty() ? si.socketcon.c_str() : scon.c_str(); //第一步 int s = create_socket(si.name.c_str(), socket_type, si.perm, si.uid, si.gid, socketcon); if (s >= 0) { //第二步 PublishSocket(si.name, s); } } //...... }init中对每条socket声明做了以下两步处理: 1.create_socket 2.PublishSocket 这两步又具体做了什么呢?
/* 文件:system/core/init/util.cpp */ /* * create_socket - creates a Unix domain socket in ANDROID_SOCKET_DIR * ("/dev/socket") as dictated in init.rc. This socket is inherited by the * daemon. We communicate the file descriptor's value via the environment * variable ANDROID_SOCKET_ENV_PREFIX<name> ("ANDROID_SOCKET_foo"). */ int create_socket(const char *name, int type, mode_t perm, uid_t uid, gid_t gid, const char *socketcon) { struct sockaddr_un addr; int fd, ret, savederrno; char *filecon; //...... //调用socket函数创建用于本地IPC的socket,也就是注释中说的Unix域socket fd = socket(PF_UNIX, type, 0); if (fd < 0) { ERROR("Failed to open socket '%s': %s\n", name, strerror(errno)); return -1; } //...... memset(&addr, 0 , sizeof(addr)); addr.sun_family = AF_UNIX; //AF_UNIX表示要使用一个本地文件作为通信地址 snprintf(addr.sun_path, sizeof(addr.sun_path), ANDROID_SOCKET_DIR"/%s", name); //指明使用的本地文件为 /dev/socket/installd //...... //绑定创建socket的通信地址为 /dev/socket/installd ret = bind(fd, (struct sockaddr *) &addr, sizeof (addr)); //...... //根据声明修改owner和group ret = lchown(addr.sun_path, uid, gid); //根据声明修改权限 ret = fchmodat(AT_FDCWD, addr.sun_path, perm, AT_SYMLINK_NOFOLLOW); //...... return fd; //...... } /* 文件:system/core/init/service.cpp */ //PublishSocket函数比较好理解,就是根据声明的name=installd添加一个环境变量ANDROID_SOCKET_installd,其值为前面创建的socket的文件描述符 void Service::PublishSocket(const std::string& name, int fd) const { std::string key = StringPrintf(ANDROID_SOCKET_ENV_PREFIX "%s", name.c_str()); //key=ANDROID_SOCKET_installd std::string val = StringPrintf("%d", fd); //val=fd add_environment(key.c_str(), val.c_str()); //添加到环境变量 /* make sure we don't close-on-exec */ fcntl(fd, F_SETFD, 0); }init中对socket声明的处理分析完了,总结一下做了哪些事情 1.(调用socket函数)创建一个Unix域的socket,也就是用于本地IPC的socket,并根据socket声明中的name(调用bind函数)为其绑定/dev/socket/name文件作为通信地址 2.根据socket声明设置/dev/socket/name文件的owner、group以及权限等 3.添加一个名为ANDROID_SOCKET_name的环境变量,其值为前面创建的socket的文件描述符
然而init中只完成了socket和bind流程,接下来的listen和accept流程则由installd进程完成。installd进程的入口函数为main函数。
/* 文件:frameworks/native/cmds/installd/installd.cpp */ int main(const int argc, char *argv[]) { return android::installd::installd_main(argc, argv); } static int installd_main(const int argc ATTRIBUTE_UNUSED, char *argv[]) { char buf[BUFFER_MAX]; struct sockaddr addr; socklen_t alen; int lsocket, s; //...... /* android_get_control_socket函数定义在system/core/include/cutils/sockets.h中 这个函数的逻辑很简单,就是获取环境变量ANDROID_SOCKET_$SOCKET_PATH的值,也就是环境变量ANDROID_SOCKET_installd的值 显而易见,就是拿到init中创建的对应的socket的文件描述符 */ lsocket = android_get_control_socket(SOCKET_PATH); //SOCKET_PATH="installd" if (lsocket < 0) { ALOGE("Failed to get socket from environment: %s\n", strerror(errno)); exit(1); } if (listen(lsocket, 5)) { //调用listen函数监听是否有客户端连接请求 ALOGE("Listen on socket failed: %s\n", strerror(errno)); exit(1); } fcntl(lsocket, F_SETFD, FD_CLOEXEC); for (;;) { alen = sizeof(addr); s = accept(lsocket, &addr, &alen); //调用accept()函数接受和处理客户端的连接请求,返回一个新的socket用于和客户端通信 if (s < 0) { ALOGE("Accept failed: %s\n", strerror(errno)); continue; } fcntl(s, F_SETFD, FD_CLOEXEC); ALOGI("new connection\n"); for (;;) { //连接成功,准备读取来自客户端的消息并执行相应动作 unsigned short count; if (readx(s, &count, sizeof(count))) { ALOGE("failed to read size\n"); break; } if ((count < 1) || (count >= BUFFER_MAX)) { ALOGE("invalid size %d\n", count); break; } if (readx(s, buf, count)) { ALOGE("failed to read command\n"); break; } buf[count] = 0; if (selinux_enabled && selinux_status_updated() > 0) { selinux_android_seapp_context_reload(); } if (execute(s, buf)) break; } ALOGI("closing connection\n"); close(s); } return 0; }至此服务端的准备工作都完成了,就等客户端的连接了。
客户段流程以installd进程对应的framework层的客户端Installer服务的初始化为切入点。
/* 文件:frameworks/base/services/java/com/android/server/SystemServer.java */ private void startBootstrapServices() { // Wait for installd to finish starting up so that it has a chance to // create critical directories such as /data/user with the appropriate // permissions. We need this to complete before we initialize other services. Installer installer = mSystemServiceManager.startService(Installer.class); //...... } //startService方法会先调用Installer的构造方法,然后再调用它的onStart()方法 /* 文件:frameworks/base/services/core/java/com/android/server/pm/Installer.java */ //构造方法 public Installer(Context context) { super(context); mInstaller = new InstallerConnection(); } //onStart()方法 public void onStart() { Slog.i(TAG, "Waiting for installd to be ready."); mInstaller.waitForConnection(); }跟进waitForConnection()方法:
/* 文件:frameworks/base/core/java/com/android/internal/os/InstallerConnection.java */ //每隔一秒ping一次服务端,直到ping成功才退出 public void waitForConnection() { for (;;) { try { execute("ping"); return; } catch (InstallerException ignored) { } Slog.w(TAG, "installd not ready"); SystemClock.sleep(1000); } } public String[] execute(String cmd, Object... args) throws InstallerException { //...... final String[] resRaw = transact(builder.toString()).split(" "); //transact("ping") //...... } public synchronized String transact(String cmd) { //...... if (!connect()) { //先去连接 Slog.e(TAG, "connection failed"); return "-1"; } if (!writeCommand(cmd)) { //再往服务端发送"ping"消息 /* * If installd died and restarted in the background (unlikely but * possible) we'll fail on the next write (this one). Try to * reconnect and write the command one more time before giving up. */ Slog.e(TAG, "write command failed? reconnect!"); if (!connect() || !writeCommand(cmd)) { return "-1"; } } if (LOCAL_DEBUG) { Slog.i(TAG, "send: '" + cmd + "'"); } final int replyLength = readReply(); //读取服务端的返回 if (replyLength > 0) { String s = new String(buf, 0, replyLength); if (LOCAL_DEBUG) { Slog.i(TAG, "recv: '" + s + "'"); } return s; } else { if (LOCAL_DEBUG) { Slog.i(TAG, "fail"); } return "-1"; } }关键在于connect()函数
/* 文件:frameworks/base/core/java/com/android/internal/os/InstallerConnection.java */ private boolean connect() { if (mSocket != null) { return true; } Slog.i(TAG, "connecting..."); try { mSocket = new LocalSocket(); //后续分析点1 //指明socket的通信地址为/dev/socket/installd,Namespace.RESERVED是指向/dev/socket路径的命名空间 LocalSocketAddress address = new LocalSocketAddress("installd", LocalSocketAddress.Namespace.RESERVED); mSocket.connect(address); //后续分析点2 mIn = mSocket.getInputStream(); mOut = mSocket.getOutputStream(); } catch (IOException ex) { disconnect(); return false; } return true; } //分析点1:mSocket = new LocalSocket() /* 文件:frameworks/base/core/java/android/net/LocalSocket.java */ //这一句的最终调用如下: LocalSocket(LocalSocketImpl impl, int sockType) { this.impl = impl; // impl = new LocalSocketImpl() this.sockType = sockType; // sockType = SOCKET_STREAM this.isConnected = false; this.isBound = false; } //分析点2:mSocket.connect(address) /* 文件:frameworks/base/core/java/android/net/LocalSocket.java */ public void connect(LocalSocketAddress endpoint) throws IOException { synchronized (this) { if (isConnected) { throw new IOException("already connected"); } implCreateIfNeeded(); //分析点3:创建socket impl.connect(endpoint, 0); //分析点4:connect isConnected = true; isBound = true; } }看到客户端创建socket和进行连接的影子了,接下来分析具体是如何创建和连接的。 创建:
//分析点3:创建socket implCreateIfNeeded() /* 文件:frameworks/base/core/java/android/net/LocalSocket.java */ private void implCreateIfNeeded() throws IOException { if (!implCreated) { synchronized (this) { if (!implCreated) { try { impl.create(sockType); //sockType = SOCKET_STREAM } finally { implCreated = true; } } } } } /* 文件:frameworks/base/core/java/android/net/LocalSocketImpl.java */ public void create(int sockType) throws IOException { // no error if socket already created // need this for LocalServerSocket.accept() if (fd == null) { int osType; switch (sockType) { case LocalSocket.SOCKET_DGRAM: osType = OsConstants.SOCK_DGRAM; break; case LocalSocket.SOCKET_STREAM: osType = OsConstants.SOCK_STREAM; break; case LocalSocket.SOCKET_SEQPACKET: osType = OsConstants.SOCK_SEQPACKET; break; default: throw new IllegalStateException("unknown sockType"); } try { fd = Os.socket(OsConstants.AF_UNIX, osType, 0); //前面介绍过,AF_UNIX表示要使用一个本地文件作为通信地址,与服务端相对应 mFdCreatedInternally = true; } catch (ErrnoException e) { e.rethrowAsIOException(); } } } //fd = Os.socket(OsConstants.AF_UNIX, osType, 0)最终会调用到native层的Posix_socket函数 /* 文件:libcore/luni/src/main/native/libcore_io_Posix.cpp */ static jobject Posix_socket(JNIEnv* env, jobject, jint domain, jint type, jint protocol) { if (domain == AF_PACKET) { protocol = htons(protocol); // Packet sockets specify the protocol in host byte order. } //最终调用到socket函数了,接下来就需要调用connect函数进行连接了 int fd = throwIfMinusOne(env, "socket", TEMP_FAILURE_RETRY(socket(domain, type, protocol))); return fd != -1 ? jniCreateFileDescriptor(env, fd) : NULL; }连接:
//分析点4:impl.connect(endpoint, 0); /* 文件:frameworks/base/core/java/android/net/LocalSocketImpl.java */ protected void connect(LocalSocketAddress address, int timeout) throws IOException { if (fd == null) { throw new IOException("socket not created"); } //native方法,实现在android_net_LocalSocketImpl.cpp的socket_connect_local函数 connectLocal(fd, address.getName(), address.getNamespace().getId()); } /* frameworks/base/core/jni/android_net_LocalSocketImpl.cpp */ static void socket_connect_local(JNIEnv *env, jobject object, jobject fileDescriptor, jstring name, jint namespaceId) { //...... ret = socket_local_client_connect( fd, nameUtf8.c_str(), namespaceId, SOCK_STREAM); //...... } /* 文件:system/core/libcutils/socket_local_client_unix.c */ int socket_local_client_connect(int fd, const char *name, int namespaceId, int type UNUSED) { struct sockaddr_un addr; socklen_t alen; int err; //根据LocalSocketAddress构造native层用到的地址结构体sockaddr_un,对应installd进程初始化时构造的结构体sockaddr_un,后续分析 err = socket_make_sockaddr_un(name, namespaceId, &addr, &alen); if (err < 0) { goto error; } if(connect(fd, (struct sockaddr *) &addr, alen) < 0) { //调用到connect了! goto error; } return fd; error: return -1; } //分析下socket_make_sockaddr_un函数 /* 文件:system/core/libcutils/socket_local_client_unix.c */ int socket_make_sockaddr_un(const char *name, int namespaceId, struct sockaddr_un *p_addr, socklen_t *alen) { memset (p_addr, 0, sizeof (*p_addr)); size_t namelen; switch (namespaceId) { case ANDROID_SOCKET_NAMESPACE_ABSTRACT: //...... break; case ANDROID_SOCKET_NAMESPACE_RESERVED: //#define ANDROID_RESERVED_SOCKET_PREFIX "/dev/socket/" namelen = strlen(name) + strlen(ANDROID_RESERVED_SOCKET_PREFIX); /* unix_path_max appears to be missing on linux */ if (namelen > sizeof(*p_addr) - offsetof(struct sockaddr_un, sun_path) - 1) { goto error; } strcpy(p_addr->sun_path, ANDROID_RESERVED_SOCKET_PREFIX); strcat(p_addr->sun_path, name); //现在p_addr->sun_path="/dev/socket/installd" break; case ANDROID_SOCKET_NAMESPACE_FILESYSTEM: //...... break; default: // invalid namespace id return -1; } p_addr->sun_family = AF_LOCAL; //经过前面的处理后,地址结构体sockaddr_un里的参数不就和init进程里bind socket时传入的sockaddr_un的参数相同了吗? *alen = namelen + offsetof(struct sockaddr_un, sun_path) + 1; return 0; error: return -1; }至此客户端的创建和请求连接的过程也分析完了,请求连接成功后,客户端就可以和服务端通信了,例如前面提到的发送"ping"消息和获取"ping"消息的返回。
想要了解Android LocalSocket的工作机制,主要需要分析清楚以下两点:
客户端和服务端是如何基于Socket编程模型做好通信前的准备的framework客户端的LocalSocketAddress是如何和native服务端的struct sockaddr_un指向通信地址的