Guides

Workflows

End-to-end recipes for the most common tasks, each with a runnable Python script (only the requests library is required). For the full per-endpoint reference see the API documentation; to run requests live, open the interactive Swagger UI. Conventions and known quirks are documented on the Conventions page.

Use-case recipes

Step-by-step guides for specific collection & discovery tasks — each opens a dedicated page with runnable sample code. See them all on the use-case recipes page.


0 · Shared setup

Every script below assumes these helpers and an authenticated key.

import requests, time

BASE = "https://datatap-dev-api.azurewebsites.net"  # your environment's API base URL

# Obtain these from the trial page — it returns your tenant id, a tenant-scoped API key, and the
# provisioning task id. Paste them here:
TENANT_ID = "<your-tenant-id>"
API_KEY   = "<your-tenant-api-key>"   # sent as the X-API-Key header

# Apollo serializes responses in PascalCase (e.g. "TenantId", "ApiKey"), so read keys as returned.
def post(path, key=None, json=None):
    headers = {"X-API-Key": key} if key else {}
    r = requests.post(f"{BASE}{path}", headers=headers, json=json, timeout=60)
    r.raise_for_status()
    return r.json() if r.content else None

def get(path, key=None, params=None):
    headers = {"X-API-Key": key} if key else {}
    r = requests.get(f"{BASE}{path}", headers=headers, params=params, timeout=60)
    r.raise_for_status()
    return r.json() if r.content else None

Public · Onboarding & authentication

Get a tenant and credentials from the free-trial page: verify your email and it provisions a dedicated trial tenant, returning your Tenant ID, a tenant-scoped API key (sent as the X-API-KEY header), and a provisioning task ID you can poll. Paste the Tenant ID and API key into the TENANT_ID / API_KEY constants in the setup block above — the scripts below use them. No API calls are needed for onboarding; the trial page handles it.

Public · Collect Microsoft 365 data

Pick a Microsoft 365 source and a blob destination, create a BACKUP job, start it, and track the async task. Set indexFiles: true so the data is searchable.

# Prerequisite: a Microsoft 365 source location and a blob destination location.
# List existing locations, or create them with POST /api/{tenant}/locations.
locations = get(f"/api/{TENANT_ID}/locations", key=API_KEY)
SOURCE_ID = next(l["Id"] for l in locations if l["Type"] == "EXCHANGE")  # M365 source
DEST_ID   = next(l["Id"] for l in locations if l["Type"] == "BLOB")      # blob destination

# Create a BACKUP job (source = M365, destination = BLOB) and run it now.
job = post(f"/api/{TENANT_ID}/jobs", key=API_KEY, json={
    "priority": "Medium",
    "name": "Nightly Exchange collection",
    "description": "Mailbox collection to blob",
    "type": "BACKUP",
    "schedule": {"type": "NOW"},          # or DAILY/WEEKLY with executionTime + daysOfWeek
    "sourceId": SOURCE_ID,
    "destinationId": DEST_ID,
    "indexFiles": True                     # index so the data is searchable afterwards
})
JOB_ID = job["Id"]

task = post(f"/api/{TENANT_ID}/jobs/{JOB_ID}/start", key=API_KEY)
TASK_ID = task["TaskId"]

# Track the async task: STARTING -> RUNNING -> READY (or FAILED).
while True:
    st = get(f"/api/{TENANT_ID}/jobs/{JOB_ID}/status/{TASK_ID}", key=API_KEY)
    print("job state:", st["State"])
    if st["State"] in ("READY", "FAILED"):
        break
    time.sleep(10)

# Inspect progress / history:
running  = get(f"/api/{TENANT_ID}/monitoring", key=API_KEY, params={"jobTypes": "BACKUP"})
finished = get(f"/api/{TENANT_ID}/reporting",  key=API_KEY, params={"jobId": JOB_ID})

Search is a premium feature — it requires a CollectionAndIndexingtenant (Azure AI Search). Combine full-text query (subject/body) with an exact OData filter over the filterable fields. The use cases below map directly to the recipes in the premium reference.

# Search the collected / archived email. The index name is your tenant's search index.
INDEX = "dg-search-index"   # confirm via GET /api/{tenant}/indexes/{index}/fields

def search(query=None, filter=None, top=50, skip=0, orderby=None):
    params = {"top": top, "skip": skip}
    if query:   params["query"] = query        # full-text over subject/body
    if filter:  params["filter"] = filter       # exact OData over filterable fields
    if orderby: params["$orderby"] = orderby
    return get(f"/api/{TENANT_ID}/indexes/{INDEX}/documents", key=API_KEY, params=params)

# Use case A — full-text: invoices mentioning "overdue"
a = search(query='invoice AND overdue')

# Use case B — everything from a sender, with attachments, received in Q1 2024
b = search(filter=(
    "fromAddress eq 'a.user@contoso.com' "
    "and hasAttachments eq true "
    "and receivedDateTime ge 2024-01-01T00:00:00Z "
    "and receivedDateTime le 2024-03-31T23:59:59Z"
))

# Use case C — mail addressed to someone (collection field → any), newest first
c = search(filter="toRecipients/any(r: r/address eq 'b.user@contoso.com')",
           orderby="receivedDateTime desc")

# Use case D — a whole conversation thread, oldest first
d = search(filter="conversationId eq '<conversationId>'", orderby="receivedDateTime asc")

# Use case E — a specific attachment by file name
e = search(filter="attachments/any(x: x/name eq 'contract.pdf')")

for hit in b.get("Results", []):
    print(hit.get("score"), hit.get("subject"), "from", hit.get("fromAddress"))
print("total matches:", b.get("Count"))

Public · Restore

Restore a whole mailbox with a RESTORE job (source/destination reversed), or pull back a single message via Discovery without a full restore.

# Full restore: a RESTORE job is the reverse of a collection — source = BLOB collection,
# destination = the matching Microsoft 365 location.
restore = post(f"/api/{TENANT_ID}/jobs", key=API_KEY, json={
    "priority": "High",
    "name": "Restore mailbox",
    "description": "Restore from blob collection",
    "type": "RESTORE",
    "schedule": {"type": "NOW"},
    "sourceId": DEST_ID,        # the blob collection location
    "destinationId": SOURCE_ID  # the M365 location to restore into
})
post(f"/api/{TENANT_ID}/jobs/{restore['Id']}/start", key=API_KEY)

# Single-item retrieval (no full restore): browse with Discovery, then download one message as EML.
# Listing individual messages requires a date range; each result carries a FragmentId.
items = get(
    f"/api/{TENANT_ID}/discovery/{LOCATION_ID}/source/{SOURCE_ID}/items/{NODE_ID}",
    key=API_KEY,
    params={"types": "MessageItem", "fromDate": "2024-01-01", "toDate": "2024-03-31", "PageSize": 50}
)
item = items["Items"][0]
eml = requests.get(
    f"{BASE}/api/{TENANT_ID}/discovery/{LOCATION_ID}/source/{SOURCE_ID}/items/{item['ItemId']}/download",
    headers={"X-API-Key": API_KEY},
    params={"dataType": "email", "fragmentId": item["FragmentId"], "format": "eml"},
    timeout=120
)
open("message.eml", "wb").write(eml.content)

Admin · Keys, consent & version rollout

Administrative tasks require a full-scope (admin) key. These endpoints are hidden from tenant keys and from the public API document.

# Administrative workflows require a FULL-scope (admin) key, not a tenant key.
ADMIN_KEY = "..."   # keyId:secret of an admin key

# Issue a tenant-scoped key for a customer tenant.
issued = post("/api/authentication/generateApiKey", key=ADMIN_KEY,
              json={"scope": "Tenant", "tenantId": TENANT_ID})
print(issued["ApiKey"], "expires", issued["ExpirationDate"])

# Onboard a Microsoft 365 tenant — generate the admin-consent URL and send the customer there.
consent = get("/api/auth/consent-url", key=ADMIN_KEY)
print("Send the tenant admin to:", consent["ConsentUrl"])

# Version rollout — pin specific tenants to a container build, then promote globally.
post("/api/config/tenants", key=ADMIN_KEY, json={
    "tenantIds": [TENANT_ID],
    "newContainerVersion": {"tag": "v2.2.0", "digest": "sha256:..."}
})