我尝试使用 Databricks-SQL-Connector 通过 Azure 机器学习笔记本从 Databricks SQL 仓库中提取数据,并在 Jupyter Notebook 中收到以下错误。我正在使用以下脚本:
from databricks import sql
import os
connection = sql.connect(
server_hostname = "adb-4285847157798999.17.azuredatabricks.net",
http_path = "/sql/1.0/warehouses/37667bb8d1fe23fa",
access_token = "<access-token>")
cursor = connection.cursor()
cursor.execute("SELECT * from range(10)")
print(cursor.fetchall())
cursor.close()
connection.close()
以下是我收到的错误。请建议如何解决这个问题,因为我是新手。
gaierror Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:199, in HTTPConnection._new_conn(self)
198 try:
--> 199 sock = connection.create_connection(
200 (self._dns_host, self.port),
201 self.timeout,
202 source_address=self.source_address,
203 socket_options=self.socket_options,
204 )
205 except socket.gaierror as e:
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/util/connection.py:60, in create_connection(address, timeout, source_address, socket_options)
58 raise LocationParseError(f"'{host}', label empty or too long") from None
---> 60 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
61 af, socktype, proto, canonname, sa = res
File /anaconda/envs/nps/lib/python3.10/socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
956 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
NameResolutionError Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:789, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
788 # Make the request on the HTTPConnection object
--> 789 response = self._make_request(
790 conn,
791 method,
792 url,
793 timeout=timeout_obj,
794 body=body,
795 headers=headers,
796 chunked=chunked,
797 retries=retries,
798 response_conn=response_conn,
799 preload_content=preload_content,
800 decode_content=decode_content,
801 **response_kw,
802 )
804 # Everything went great!
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:490, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
489 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
--> 490 raise new_e
492 # conn.request() calls http.client.*.request, not the method in
493 # urllib3.request. It also calls makefile (recv) on the socket.
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:466, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
465 try:
--> 466 self._validate_conn(conn)
467 except (SocketTimeout, BaseSSLError) as e:
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:1095, in HTTPSConnectionPool._validate_conn(self, conn)
1094 if conn.is_closed:
-> 1095 conn.connect()
1097 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:693, in HTTPSConnection.connect(self)
692 sock: socket.socket | ssl.SSLSocket
--> 693 self.sock = sock = self._new_conn()
694 server_hostname: str = self.host
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connection.py:206, in HTTPConnection._new_conn(self)
205 except socket.gaierror as e:
--> 206 raise NameResolutionError(self.host, self, e) from e
207 except SocketTimeout as e:
NameResolutionError: <urllib3.connection.HTTPSConnection object at 0x7f065ff85ea0>: Failed to resolve 'dbstoragej5fibm47ntgqa.blob.core.windows.net' ([Errno -2] Name or service not known)
The above exception was the direct cause of the following exception:
MaxRetryError Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
666 try:
--> 667 resp = conn.urlopen(
668 method=request.method,
669 url=url,
670 body=request.body,
671 headers=request.headers,
672 redirect=False,
673 assert_same_host=False,
674 preload_content=False,
675 decode_content=False,
676 retries=self.max_retries,
677 timeout=timeout,
678 chunked=chunked,
679 )
681 except (ProtocolError, OSError) as err:
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
870 log.warning(
871 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
872 )
--> 873 return self.urlopen(
874 method,
875 url,
876 body,
877 headers,
878 retries,
879 redirect,
880 assert_same_host,
881 timeout=timeout,
882 pool_timeout=pool_timeout,
883 release_conn=release_conn,
884 chunked=chunked,
885 body_pos=body_pos,
886 preload_content=preload_content,
887 decode_content=decode_content,
888 **response_kw,
889 )
891 # Handle redirect?
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
870 log.warning(
871 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
872 )
--> 873 return self.urlopen(
874 method,
875 url,
876 body,
877 headers,
878 retries,
879 redirect,
880 assert_same_host,
881 timeout=timeout,
882 pool_timeout=pool_timeout,
883 release_conn=release_conn,
884 chunked=chunked,
885 body_pos=body_pos,
886 preload_content=preload_content,
887 decode_content=decode_content,
888 **response_kw,
889 )
891 # Handle redirect?
[... skipping similar frames: HTTPConnectionPool.urlopen at line 873 (2 times)]
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:873, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
870 log.warning(
871 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
872 )
--> 873 return self.urlopen(
874 method,
875 url,
876 body,
877 headers,
878 retries,
879 redirect,
880 assert_same_host,
881 timeout=timeout,
882 pool_timeout=pool_timeout,
883 release_conn=release_conn,
884 chunked=chunked,
885 body_pos=body_pos,
886 preload_content=preload_content,
887 decode_content=decode_content,
888 **response_kw,
889 )
891 # Handle redirect?
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/connectionpool.py:843, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
841 new_e = ProtocolError("Connection aborted.", new_e)
--> 843 retries = retries.increment(
844 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
845 )
846 retries.sleep()
File /anaconda/envs/nps/lib/python3.10/site-packages/urllib3/util/retry.py:519, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
518 reason = error or ResponseError(cause)
--> 519 raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
521 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
MaxRetryError: HTTPSConnectionPool(host='dbstoragej5fibm47ntgqa.blob.core.windows.net', port=443): Max retries exceeded with url: /jobs/4285847157799137/sql/2024-12-19/14/results_2024-12-19T14:41:23Z_c0096a00-d0d4-450e-934f-f1ed607087ea?sig=vMkEoWgwiFZTQvCPve8bdNRwveQcepSL5GC8t8Biczc%3D&se=2024-12-19T14%3A56%3A24Z&sv=2019-02-02&spr=https&sp=r&sr=b (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f065ff85ea0>: Failed to resolve 'dbstoragej5fibm47ntgqa.blob.core.windows.net' ([Errno -2] Name or service not known)"))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
File /anaconda/envs/nps/lib/python3.10/s
按照以下逐步过程来实现您的要求。
在 Azure ML 工作区中打开 Notebook 并安装 databricks 连接器,如下所示。
pip install databricks-sql-connector
安装后,重新启动内核一次以反映更新的软件包。
然后使用相同的代码,您可以看到它对我有用。
当
databricks-connect
版本与Pyspark版本冲突时,可能会出现上述情况。您可以尝试故障排除步骤,例如卸载当前的 Pyspark 后更改计算并重新安装总包。
参考: