获取([Errno -2] 名称或服务未知)")) - 从 Azure MLS Jupyter 笔记本连接到 Databricks SQL 仓库时

问题描述 投票:0回答:1

我尝试使用 Databricks-SQL-Connector 通过 Azure 机器学习笔记本从 Databricks SQL 仓库中提取数据,并在 Jupyter Notebook 中收到以下错误。我正在使用以下脚本:

from databricks import sql
import os

connection = sql.connect(
                        server_hostname = "adb-4285847157798999.17.azuredatabricks.net",
                        http_path = "/sql/1.0/warehouses/37667bb8d1fe23fa",
                        access_token = "<access-token>")

cursor = connection.cursor()

cursor.execute("SELECT * from range(10)")
print(cursor.fetchall())

cursor.close()
connection.close()

以下是我收到的错误。请建议如何解决这个问题,因为我是新手。

gaierror                                  Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:199, in HTTPConnection._new_conn(self)
    198 try:
--> 199     sock = connection.create_connection(
    200         (self._dns_host, self.port),
    201         self.timeout,
    202         source_address=self.source_address,
    203         socket_options=self.socket_options,
    204     )
    205 except socket.gaierror as e:

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/util/connection.py:60, in create_connection(address, timeout, source_address, socket_options)
     58     raise LocationParseError(f"'{host}', label empty or too long") from None
---> 60 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     61     af, socktype, proto, canonname, sa = res

File /anaconda/envs/nps/lib/python3.10/socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
    954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    956     af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

The above exception was the direct cause of the following exception:

NameResolutionError                       Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:789, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    788 # Make the request on the HTTPConnection object
--> 789 response = self._make_request(
    790     conn,
    791     method,
    792     url,
    793     timeout=timeout_obj,
    794     body=body,
    795     headers=headers,
    796     chunked=chunked,
    797     retries=retries,
    798     response_conn=response_conn,
    799     preload_content=preload_content,
    800     decode_content=decode_content,
    801     **response_kw,
    802 )
    804 # Everything went great!

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:490, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
    489         new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
--> 490     raise new_e
    492 # conn.request() calls http.client.*.request, not the method in
    493 # urllib3.request. It also calls makefile (recv) on the socket.

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:466, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
    465 try:
--> 466     self._validate_conn(conn)
    467 except (SocketTimeout, BaseSSLError) as e:

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:1095, in HTTPSConnectionPool._validate_conn(self, conn)
   1094 if conn.is_closed:
-> 1095     conn.connect()
   1097 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:693, in HTTPSConnection.connect(self)
    692 sock: socket.socket | ssl.SSLSocket
--> 693 self.sock = sock = self._new_conn()
    694 server_hostname: str = self.host

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:206, in HTTPConnection._new_conn(self)
    205 except socket.gaierror as e:
--> 206     raise NameResolutionError(self.host, self, e) from e
    207 except SocketTimeout as e:

NameResolutionError: <urllib3.connection.HTTPSConnection object at 0x7f065ff85ea0>: Failed to resolve 'dbstoragej5fibm47ntgqa.blob.core.windows.net' ([Errno -2] Name or service not known)

The above exception was the direct cause of the following exception:

MaxRetryError                             Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    666 try:
--> 667     resp = conn.urlopen(
    668         method=request.method,
    669         url=url,
    670         body=request.body,
    671         headers=request.headers,
    672         redirect=False,
    673         assert_same_host=False,
    674         preload_content=False,
    675         decode_content=False,
    676         retries=self.max_retries,
    677         timeout=timeout,
    678         chunked=chunked,
    679     )
    681 except (ProtocolError, OSError) as err:

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    870     log.warning(
    871         "Retrying (%r) after connection broken by '%r': %s", retries, err, url
    872     )
--> 873     return self.urlopen(
    874         method,
    875         url,
    876         body,
    877         headers,
    878         retries,
    879         redirect,
    880         assert_same_host,
    881         timeout=timeout,
    882         pool_timeout=pool_timeout,
    883         release_conn=release_conn,
    884         chunked=chunked,
    885         body_pos=body_pos,
    886         preload_content=preload_content,
    887         decode_content=decode_content,
    888         **response_kw,
    889     )
    891 # Handle redirect?

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    870     log.warning(
    871         "Retrying (%r) after connection broken by '%r': %s", retries, err, url
    872     )
--> 873     return self.urlopen(
    874         method,
    875         url,
    876         body,
    877         headers,
    878         retries,
    879         redirect,
    880         assert_same_host,
    881         timeout=timeout,
    882         pool_timeout=pool_timeout,
    883         release_conn=release_conn,
    884         chunked=chunked,
    885         body_pos=body_pos,
    886         preload_content=preload_content,
    887         decode_content=decode_content,
    888         **response_kw,
    889     )
    891 # Handle redirect?

    [... skipping similar frames: HTTPConnectionPool.urlopen at line 873 (2 times)]

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    870     log.warning(
    871         "Retrying (%r) after connection broken by '%r': %s", retries, err, url
    872     )
--> 873     return self.urlopen(
    874         method,
    875         url,
    876         body,
    877         headers,
    878         retries,
    879         redirect,
    880         assert_same_host,
    881         timeout=timeout,
    882         pool_timeout=pool_timeout,
    883         release_conn=release_conn,
    884         chunked=chunked,
    885         body_pos=body_pos,
    886         preload_content=preload_content,
    887         decode_content=decode_content,
    888         **response_kw,
    889     )
    891 # Handle redirect?

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:843, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    841     new_e = ProtocolError("Connection aborted.", new_e)
--> 843 retries = retries.increment(
    844     method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
    845 )
    846 retries.sleep()

File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/util/retry.py:519, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    518     reason = error or ResponseError(cause)
--> 519     raise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
    521 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPSConnectionPool(host='dbstoragej5fibm47ntgqa.blob.core.windows.net', port=443): Max retries exceeded with url: /jobs/4285847157799137/sql/2024-12-19/14/results_2024-12-19T14:41:23Z_c0096a00-d0d4-450e-934f-f1ed607087ea?sig=vMkEoWgwiFZTQvCPve8bdNRwveQcepSL5GC8t8Biczc%3D&se=2024-12-19T14%3A56%3A24Z&sv=2019-02-02&spr=https&sp=r&sr=b (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f065ff85ea0>: Failed to resolve 'dbstoragej5fibm47ntgqa.blob.core.windows.net' ([Errno -2] Name or service not known)"))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/s
python databricks azure-machine-learning-service urllib3 databricks-sql
1个回答
0
投票

按照以下逐步过程来实现您的要求。

在 Azure ML 工作区中打开 Notebook 并安装 databricks 连接器,如下所示。

pip install databricks-sql-connector

安装后,重新启动内核一次以反映更新的软件包。

然后使用相同的代码,您可以看到它对我有用。

enter image description here

databricks-connect
版本与Pyspark版本冲突时,可能会出现上述情况。您可以尝试故障排除步骤,例如卸载当前的 Pyspark 后更改计算并重新安装总包。

参考:

# 针对 Python 的 Databricks Connect 故障排除

© www.soinside.com 2019 - 2024. All rights reserved.