Azure AI Document Intelligence
An Azure service that turns documents into usable data. Previously known as Azure Form Recognizer.
This browser is no longer supported.
Upgrade to Microsoft Edge to take advantage of the latest features, security updates, and technical support.
How to integrate or import the JSON output generated by Azure AI Document Intelligence into our existing Excel database?
Thanks.
Hi Gautami Kulkarni
You can draft your custom Json parsing logic based on the Json to extract the key field to panda frame. Post which, it can be converted to csv which should be importable to excel database.
Here is a sample python code that extract key fields and output two csv files.
import json
import pandas as pd
from pathlib import Path
# JSON string goes here (truncated for brevity in this snippet)
root = json.loads(json_str)
res = root["analyzeResult"]
def get_field_value(field):
if field is None:
return None
t = field.get("type")
if t == "string":
return field.get("valueString") or field.get("content")
if t == "number":
return field.get("valueNumber")
if t == "date":
return field.get("valueDate") or field.get("content")
if t == "currency":
cur = field.get("valueCurrency") or {}
return cur.get("amount") # numeric currency amount
if t == "address":
va = field.get("valueAddress") or {}
parts = [va.get("streetAddress"), va.get("city"), va.get("state"), va.get("postalCode")]
return ", ".join([p for p in parts if p])
if t == "array":
return field.get("valueArray")
return field.get("content")
invoice = res["documents"][0]
fields = invoice["fields"]
header = {
"VendorName": get_field_value(fields.get("VendorName")),
"InvoiceId": get_field_value(fields.get("InvoiceId")),
"InvoiceDate": get_field_value(fields.get("InvoiceDate")),
"DueDate": get_field_value(fields.get("DueDate")),
"CustomerName": get_field_value(fields.get("CustomerName")),
"CustomerId": get_field_value(fields.get("CustomerId")),
"BillingAddressRecipient": get_field_value(fields.get("BillingAddressRecipient")),
"BillingAddress": get_field_value(fields.get("BillingAddress")),
"ShippingAddressRecipient": get_field_value(fields.get("ShippingAddressRecipient")) if fields.get("ShippingAddressRecipient") else None,
"ShippingAddress": get_field_value(fields.get("ShippingAddress")) if fields.get("ShippingAddress") else None,
"ServiceAddressRecipient": get_field_value(fields.get("ServiceAddressRecipient")) if fields.get("ServiceAddressRecipient") else None,
"ServiceAddress": get_field_value(fields.get("ServiceAddress")) if fields.get("ServiceAddress") else None,
"PurchaseOrder": get_field_value(fields.get("PurchaseOrder")),
"SubTotal": get_field_value(fields.get("SubTotal")),
"TotalTax": get_field_value(fields.get("TotalTax")),
"InvoiceTotal": get_field_value(fields.get("InvoiceTotal")),
"PreviousUnpaidBalance": get_field_value(fields.get("PreviousUnpaidBalance")),
"AmountDue": get_field_value(fields.get("AmountDue")),
}
header_df = pd.DataFrame([header])
items = []
items_array = fields.get("Items", {}).get("valueArray", [])
for obj in items_array:
vo = obj.get("valueObject", {})
items.append({
"Date": get_field_value(vo.get("Date")),
"ProductCode": get_field_value(vo.get("ProductCode")),
"Description": get_field_value(vo.get("Description")),
"Quantity": get_field_value(vo.get("Quantity")),
"Unit": get_field_value(vo.get("Unit")) if vo.get("Unit") else None,
"UnitPrice": get_field_value(vo.get("UnitPrice")),
"Tax": get_field_value(vo.get("Tax")),
"Amount": get_field_value(vo.get("Amount")),
})
items_df = pd.DataFrame(items)
# Save
out_dir = Path('/mnt/data')
header_dfheader_df.to_csv(out_dir / 'invoice_header.csv', index=False)
Please don't forget to accept this answer if you found it useful.
Thank you.