多语言DNS解析逻辑排查

目录

  1. 1. Golang
    1. 1.1. fasthttp
  2. 2. Python
  3. 3. Java
  4. 4. Node
  5. 5. DotNet

简要排查一下不同语言下对应服务框架的DNS解析逻辑。以下讨论均基于linux系统。

前言:在linux系统下,DNS解析缓存一般是一个可选项,但是目前很多发行版都内置了nscd,这个服务会对DNS解析结果进行缓存。所以在排查DNS解析问题的时候,需要注意是否有nscd缓存的影响。见关闭Linux下的DNS缓存

Golang

Golang目前对于DNS的解析流程的实现位于 net.Resolver 下。go自身实现了一套语言层面的DNS解析器,同时也支持应用直接调用系统的DNS解析器。(纯go实现和cgo实现)


来自:https://zhuanlan.zhihu.com/p/54989059

这个顺序计算是依赖平台类型,/etc/nsswitch.conf等的。比如goos为darwin和android的平台都是用cgo解析,这可能是因为这两个平台上找不到/etc/hosts和/etc/resolv.conf文件吧。然后先files还是先dns的关系是使用/etc/nsswitch.conf的配置的。对应linux都是使用purego去解析的。

如果你确定你机器上有/etc/hosts和/etc/resolv.conf这两个文件,而且格式正确,应用程序有访问权限,那个你可以设置Resovler.PreferGo=true,强制使用purego。


解析流程如下

1709016744306

在Resolver中,会计算本次解析的方式顺序,然后依次调用对应的解析器进行解析。如果计算出的结果是使用纯go解析,那么会调用goLookupIPCNAMEOrder函数,这个函数会根据系统中的/etc/hosts和/etc/resolve.conf中的配置解析DNS结果。如果计算出的结果是cgo,会调用libc的getaddrinfo函数进行解析。

如果使用了某些第三方库,第三方库为了性能可能会使用自己的DNS解析器,这时候需要查看对应库的文档来确定具体解析方式。

fasthttp

可以在fasthttp库的 tcpdialer.go 中找到DNS缓存的设置。见 TCPDialer 结构体

// TCPDialer contains options to control a group of Dial calls.
type TCPDialer struct {
 // Concurrency controls the maximum number of concurrent Dials
 // that can be performed using this object.
 // Setting this to 0 means unlimited.
 //
 // WARNING: This can only be changed before the first Dial.
 // Changes made after the first Dial will not affect anything.
 Concurrency int

 // LocalAddr is the local address to use when dialing an
 // address.
 // If nil, a local address is automatically chosen.
 LocalAddr *net.TCPAddr

 // This may be used to override DNS resolving policy, like this:
 // var dialer = &fasthttp.TCPDialer{
 //  Resolver: &net.Resolver{
 //   PreferGo:     true,
 //   StrictErrors: false,
 //   Dial: func (ctx context.Context, network, address string) (net.Conn, error) {
 //    d := net.Dialer{}
 //    return d.DialContext(ctx, "udp", "8.8.8.8:53")
 //   },
 //  },
 // }
 Resolver Resolver

 // DisableDNSResolution may be used to disable DNS resolution
 DisableDNSResolution bool
 // DNSCacheDuration may be used to override the default DNS cache duration (DefaultDNSCacheDuration)
 DNSCacheDuration time.Duration

 tcpAddrsMap sync.Map

 concurrencyCh chan struct{}

 once sync.Once
}

Python

Python中自带的socket库在解析DNS时会进行对应的系统调用,在linux上的话,标准的cpython解释器中对这个方法的定义如下

(代码摘自cpython主分支)

/*ARGSUSED*/
static PyObject *
socket_gethostbyname(PyObject *self, PyObject *args)
{
    char *name;
    struct sockaddr_in addrbuf;
    PyObject *ret = NULL;

    if (!PyArg_ParseTuple(args, "et:gethostbyname", "idna", &name))
        return NULL;
    if (PySys_Audit("socket.gethostbyname", "O", args) < 0) {
        goto finally;
    }
    socket_state *state = get_module_state(self);
    int rc = setipaddr(state, name, (struct sockaddr *)&addrbuf,
                       sizeof(addrbuf), AF_INET);
    if (rc < 0) {
        goto finally;
    }
    ret = make_ipv4_addr(&addrbuf);
finally:
    PyMem_Free(name);
    return ret;
}
setipaddr代码(有点长)
static int
setipaddr(const char *name, struct sockaddr*addr_ret, size_t addr_ret_size, int af)
{
    struct addrinfo hints, *res;
    int error;

    memset((void *) addr_ret, '\0', sizeof(*addr_ret));
    if (name[0] == '\0') {
        int siz;
        memset(&hints, 0, sizeof(hints));
        hints.ai_family = af;
        hints.ai_socktype = SOCK_DGRAM;         /*dummy*/
        hints.ai_flags = AI_PASSIVE;
        Py_BEGIN_ALLOW_THREADS
        ACQUIRE_GETADDRINFO_LOCK
        error = getaddrinfo(NULL, "0", &hints, &res);
        Py_END_ALLOW_THREADS
        /* We assume that those thread-unsafe getaddrinfo() versions
        *are* safe regarding their return value, ie. that a
        subsequent call to getaddrinfo() does not destroy the
        outcome of the first call. */
        RELEASE_GETADDRINFO_LOCK
        if (error) {
            set_gaierror(error);
            return -1;
        }
        switch (res->ai_family) {
        case AF_INET:
            siz = 4;
            break;

# ifdef ENABLE_IPV6

        case AF_INET6:
            siz = 16;
            break;

# endif

        default:
            freeaddrinfo(res);
            PyErr_SetString(PyExc_OSError,
                "unsupported address family");
            return -1;
        }
        if (res->ai_next) {
            freeaddrinfo(res);
            PyErr_SetString(PyExc_OSError,
                "wildcard resolved to multiple address");
            return -1;
        }
        if (res->ai_addrlen < addr_ret_size)
            addr_ret_size = res->ai_addrlen;
        memcpy(addr_ret, res->ai_addr, addr_ret_size);
        freeaddrinfo(res);
        return siz;
    }
    /*special-case broadcast - inet_addr() below can return INADDR_NONE for
    * this */
    if (strcmp(name, "255.255.255.255") == 0 ||
        strcmp(name, "<broadcast>") == 0) {
        struct sockaddr_in*sin;
        if (af != AF_INET && af != AF_UNSPEC) {
            PyErr_SetString(PyExc_OSError,
                "address family mismatched");
            return -1;
        }
        sin = (struct sockaddr_in *)addr_ret;
        memset((void*) sin, '\0', sizeof(*sin));
        sin->sin_family = AF_INET;

# ifdef HAVE_SOCKADDR_SA_LEN

        sin->sin_len = sizeof(*sin);

# endif

        sin->sin_addr.s_addr = INADDR_BROADCAST;
        return sizeof(sin->sin_addr);
    }

    /* avoid a name resolution in case of numeric address */

# ifdef HAVE_INET_PTON

    /*check for an IPv4 address*/
    if (af == AF_UNSPEC || af == AF_INET) {
        struct sockaddr_in *sin = (struct sockaddr_in*)addr_ret;
        memset(sin, 0, sizeof(*sin));
        if (inet_pton(AF_INET, name, &sin->sin_addr) > 0) {
            sin->sin_family = AF_INET;

# ifdef HAVE_SOCKADDR_SA_LEN

            sin->sin_len = sizeof(*sin);

# endif

            return 4;
        }
    }

# ifdef ENABLE_IPV6

    /* check for an IPv6 address - if the address contains a scope ID, we
    * fallback to getaddrinfo(), which can handle translation from interface
    *name to interface index*/
    if ((af == AF_UNSPEC || af == AF_INET6) && !strchr(name, '%')) {
        struct sockaddr_in6 *sin = (struct sockaddr_in6*)addr_ret;
        memset(sin, 0, sizeof(*sin));
        if (inet_pton(AF_INET6, name, &sin->sin6_addr) > 0) {
            sin->sin6_family = AF_INET6;

# ifdef HAVE_SOCKADDR_SA_LEN

            sin->sin6_len = sizeof(*sin);

# endif

            return 16;
        }
    }

# endif /*ENABLE_IPV6*/

# else /*HAVE_INET_PTON*/

    /* check for an IPv4 address*/
    if (af == AF_INET || af == AF_UNSPEC) {
        struct sockaddr_in *sin = (struct sockaddr_in*)addr_ret;
        memset(sin, 0, sizeof(*sin));
        if ((sin->sin_addr.s_addr = inet_addr(name)) != INADDR_NONE) {
            sin->sin_family = AF_INET;

# ifdef HAVE_SOCKADDR_SA_LEN

            sin->sin_len = sizeof(*sin);

# endif

            return 4;
        }
    }

# endif /*HAVE_INET_PTON*/

    /* perform a name resolution */
    memset(&hints, 0, sizeof(hints));
    hints.ai_family = af;
    Py_BEGIN_ALLOW_THREADS
    ACQUIRE_GETADDRINFO_LOCK
    error = getaddrinfo(name, NULL, &hints, &res);

# if defined(__digital__) && defined(__unix__)

    if (error == EAI_NONAME && af == AF_UNSPEC) {
        /*On Tru64 V5.1, numeric-to-addr conversion fails
        if no address family is given. Assume IPv4 for now.*/
        hints.ai_family = AF_INET;
        error = getaddrinfo(name, NULL, &hints, &res);
    }

# endif

    Py_END_ALLOW_THREADS
    RELEASE_GETADDRINFO_LOCK  /* see comment in setipaddr()*/
    if (error) {
        set_gaierror(error);
        return -1;
    }
    if (res->ai_addrlen < addr_ret_size)
        addr_ret_size = res->ai_addrlen;
    memcpy((char *) addr_ret, res->ai_addr, addr_ret_size);
    freeaddrinfo(res);
    switch (addr_ret->sa_family) {
    case AF_INET:
        return 4;

# ifdef ENABLE_IPV6

    case AF_INET6:
        return 16;

# endif

    default:
        PyErr_SetString(PyExc_OSError, "unknown address family");
        return -1;
    }
}

在linux中,python的setipaddr也会执行libc中的getaddrinfo来获取DNS解析结果。

部分库出于性能问题可能会对结果进行缓存,参考对应说明文档即可。

Java

Java作为一个在虚拟机中执行的编译行语言,DNS解析的逻辑略有不同。

Java本身实现了DNS解析缓存,而且有几个配置项可以根据应用需求单独配置。

以下参考自:从 UnknownHostException 错误来分析 Java 的 DNS 解析和缓存机制

具体的配置项包含:

networkaddress.cache.ttl
networkaddress.cache.negative.ttl

其中networkaddress.cache.ttl是指DNS解析结果的缓存时间,networkaddress.cache.negative.ttl是指DNS解析失败的结果的缓存时间。

设置方式:

java.security.Security.setProperty("networkaddress.cache.ttl", "0");
java.security.Security.setProperty("networkaddress.cache.negative.ttl", "0");

就可以禁用DNS解析缓存。

Node

Node.JS中,Http请求会调用DNS查询模块,见下

// ~/.cache/typescript/5.3/node_modules/@types/node/http.d.ts
declare module "http" {
    import * as stream from "node:stream";
    import { URL } from "node:url";
    import { LookupOptions } from "node:dns";
    ....

在DNS模块中,对DNS解析方式的说明如下

// ~/.cache/typescript/5.3/node_modules/@types/node/dns.d.ts
    /**
     * Resolves a host name (e.g. `'nodejs.org'`) into the first found A (IPv4) or
     * AAAA (IPv6) record. All `option` properties are optional. If `options` is an
     * integer, then it must be `4` or `6` – if `options` is `0` or not provided, then
     * IPv4 and IPv6 addresses are both returned if found.
     *
     * With the `all` option set to `true`, the arguments for `callback` change to`(err, addresses)`, with `addresses` being an array of objects with the
     * properties `address` and `family`.
     *
     * On error, `err` is an `Error` object, where `err.code` is the error code.
     * Keep in mind that `err.code` will be set to `'ENOTFOUND'` not only when
     * the host name does not exist but also when the lookup fails in other ways
     * such as no available file descriptors.
     *
     * `dns.lookup()` does not necessarily have anything to do with the DNS protocol.
     * The implementation uses an operating system facility that can associate names
     * with addresses and vice versa. This implementation can have subtle but
     * important consequences on the behavior of any Node.js program. Please take some
     * time to consult the `Implementation considerations section` before using`dns.lookup()`.
     ...

所以在node.js中发送https请求的时候,会直接call DNS的系统调用。这就和python的方式差不多

DotNet

DotNet在SDK内部实现了一个System.Net.Dns.GetAddrInfo方法。

参考:https://stackoverflow.com/questions/34351030/no-such-host-is-known-in-getaddrinfo-c-sharp-sockets

这是微软的SDK中对DotNET的实现:

位于System/net/System/Net/DNS.cs

private unsafe static IPHostEntry GetAddrInfo(string name) {
    IPHostEntry hostEntry;
    SocketError errorCode = TryGetAddrInfo(name, out hostEntry);
    if (errorCode != SocketError.Success) {
        throw new SocketException(errorCode);
    }
    return hostEntry;
}

//
// IPv6 Changes: Add getaddrinfo and getnameinfo methods.
//
private unsafe static SocketError TryGetAddrInfo(string name, out IPHostEntry hostinfo)
{
    // gets the resolved name
    return TryGetAddrInfo(name, AddressInfoHints.AI_CANONNAME, out hostinfo);
}

private unsafe static SocketError TryGetAddrInfo(string name, AddressInfoHints flags, out IPHostEntry hostinfo)
{
    //
    // Use SocketException here to show operation not supported
    // if, by some nefarious means, this method is called on an
    // unsupported platform.
    //
#if FEATURE_PAL
    throw new SocketException(SocketError.OperationNotSupported);
#else
    SafeFreeAddrInfo root = null;
    ArrayList addresses = new ArrayList();
    string canonicalname = null;

    AddressInfo hints = new AddressInfo();
    hints.ai_flags = flags; 
    hints.ai_family = AddressFamily.Unspecified;   // gets all address families
    //
    // Use try / finally so we always get a shot at freeaddrinfo
    //
    try {
        SocketError errorCode = (SocketError)SafeFreeAddrInfo.GetAddrInfo(name, null, ref hints, out root);
        if (errorCode != SocketError.Success) { // Should not throw, return mostly blank hostentry
            hostinfo = new IPHostEntry();
            hostinfo.HostName = name;
            hostinfo.Aliases = new string[0];
            hostinfo.AddressList = new IPAddress[0];
            return errorCode;
        }

        AddressInfo* pAddressInfo = (AddressInfo*)root.DangerousGetHandle();
        //
        // Process the results
        //
        while (pAddressInfo!=null) {
            SocketAddress sockaddr;
            //
            // Retrieve the canonical name for the host - only appears in the first AddressInfo
            // entry in the returned array.
            //
            if (canonicalname==null && pAddressInfo->ai_canonname!=null) {
                canonicalname = Marshal.PtrToStringUni((IntPtr)pAddressInfo->ai_canonname);
            }
            //
            // Only process IPv4 or IPv6 Addresses. Note that it's unlikely that we'll
            // ever get any other address families, but better to be safe than sorry.
            // We also filter based on whether IPv6 is supported on the current
            // platform / machine.
            //
            if ( ( pAddressInfo->ai_family == AddressFamily.InterNetwork ) || // Never filter v4
                (pAddressInfo->ai_family == AddressFamily.InterNetworkV6 && Socket.OSSupportsIPv6))
                
            {
                sockaddr = new SocketAddress(pAddressInfo->ai_family, pAddressInfo->ai_addrlen);
                //
                // Push address data into the socket address buffer
                //
                for (int d = 0; d < pAddressInfo->ai_addrlen; d++) {
                    sockaddr.m_Buffer[d] = *(pAddressInfo->ai_addr + d);
                }
                //
                // NOTE: We need an IPAddress now, the only way to create it from a
                //       SocketAddress is via IPEndPoint. This ought to be simpler.
                //
                if ( pAddressInfo->ai_family == AddressFamily.InterNetwork ) {
                    addresses.Add( ((IPEndPoint)IPEndPoint.Any.Create(sockaddr)).Address );
                }
                else {
                    addresses.Add( ((IPEndPoint)IPEndPoint.IPv6Any.Create(sockaddr)).Address );
                }
            }
            //
            // Next addressinfo entry
            //
            pAddressInfo = pAddressInfo->ai_next;
        }
    }
    finally {
        if (root != null) {
            root.Close();
        }
    }

    //
    // Finally, put together the IPHostEntry
    //
    hostinfo = new IPHostEntry();

    hostinfo.HostName = canonicalname!=null ? canonicalname : name;
    hostinfo.Aliases = new string[0];
    hostinfo.AddressList = new IPAddress[addresses.Count];
    addresses.CopyTo(hostinfo.AddressList);

    return SocketError.Success;
#endif // FEATURE_PAL
}

其中DNS对应的系统调用位于System/net/System/Net/_SafeNetHandles.cs

internal static int GetAddrInfo(string nodename, string servicename, ref AddressInfo hints, out SafeFreeAddrInfo outAddrInfo) {
    return UnsafeNclNativeMethods.SafeNetHandlesXPOrLater.GetAddrInfoW(nodename, servicename, ref hints, out outAddrInfo);
}

显然是直接调用对应操作系统的系统调用。考虑到dotNet是dotNetCore的升级,其中DNS解析逻辑应该一致。