read data from azure sql using a pyspark notebook

Jon Z 40 Reputation points
2024-06-18T09:50:50.85+00:00

Hi,

I am trying to read data from an "azure sql" , using a pyspark notebook and it seems that there is something missing. I enclose below. How could I add this data source ?

Read data from SQL server table


Py4JJavaError Traceback (most recent call last)

Cell In [77], line 16

  9 #Read from SQL table using MS SQL Connector

 10 print("read data from SQL server table  ")

 11 jdbcDF = spark.read \

 12         .format("com.microsoft.sqlserver.jdbc.spark") \

 13         .option("url", url) \

 14         .option("dbtable", dbtable) \

 15         .option("user", user) \

---> 16 .option("password", password).load()

 18 jdbcDF.show(5)

File /opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py:184, in DataFrameReader.load(self, path, format, schema, **options)

182     return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))

183 else:

--> 184 return self._df(self._jreader.load())

File ~/cluster-env/clonedenv/lib/python3.10/site-packages/py4j/java_gateway.py:1321, in JavaMember.call(self, *args)

1315 command = proto.CALL_COMMAND_NAME +\

1316 self.command_header +\

1317 args_command +\

1318 proto.END_COMMAND_PART

1320 answer = self.gateway_client.send_command(command)

-> 1321 return_value = get_return_value(

1322 answer, self.gateway_client, self.target_id, self.name)

1324 for temp_arg in temp_args:

1325 temp_arg._detach()

File /opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py:190, in capture_sql_exception.<locals>.deco(*a, **kw)

188 def deco(*a: Any, **kw: Any) -> Any:

189     try:

--> 190 return f(*a, **kw)

191     except Py4JJavaError as e:

192         converted = convert_exception(e.java_exception)

File ~/cluster-env/clonedenv/lib/python3.10/site-packages/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)

324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)

325 if answer[1] == REFERENCE_TYPE:

--> 326 raise Py4JJavaError(

327         "An error occurred while calling {0}{1}{2}.\n".

328         format(target_id, ".", name), value)

329 else:

330     raise Py4JError(

331         "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".

332         format(target_id, ".", name, value))

Py4JJavaError: An error occurred while calling o4302.load.

: java.lang.ClassNotFoundException:

Failed to find data source: com.microsoft.sqlserver.jdbc.spark. Please find packages at

https://spark.apache.org/third-party-projects.html

at org.apache.spark.sql.errors.QueryExecutionErrors$.failedToFindDataSourceError(QueryExecutionErrors.scala:587)
Azure Synapse Analytics
Azure Synapse Analytics
An Azure analytics service that brings together data integration, enterprise data warehousing, and big data analytics. Previously known as Azure SQL Data Warehouse.
4,621 questions
{count} votes

1 answer

Sort by: Most helpful
  1. BhargavaGunnam-MSFT 28,616 Reputation points Microsoft Employee
    2024-06-18T18:03:16.2566667+00:00
    1 person found this answer helpful.
    0 comments No comments