Documentation Specification SDKs

Bootstrapping a Container

Creating a new .omnidata container is a multi-step process: create the directory structure, write the manifest, bootstrap both SQLite databases, initialize the blob store, create the adapter registry, and start the ingress log.

Step 1: Create the directory

mkdir -p my-instance.omnidata/blobs

Step 2: Write manifest.json

cat > my-instance.omnidata/manifest.json << 'EOF'
{
  "schema_version": 2,
  "instance_id": "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
  "instance_name": "my-instance",
  "owner_identity": "[email protected]",
  "hat_identifier": "my-hat",
  "description": "Knowledge container for my-hat role",
  "created_at": "2026-04-01T12:00:00Z",
  "updated_at": "2026-04-01T12:00:00Z",
  "metadata": {}
}
EOF

The instance_id must be a UUID v4, generated fresh for each new instance. It is the permanent, immutable identity of this container.

Step 3: Bootstrap index.db

sqlite3 my-instance.omnidata/index.db << 'INDEXSQL'
PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
PRAGMA cache_size = -64000;

CREATE TABLE IF NOT EXISTS omnidata_resources (
    id              TEXT PRIMARY KEY,
    uri             TEXT NOT NULL UNIQUE,
    adapter_name    TEXT NOT NULL,
    content_type    TEXT,
    title           TEXT,
    content_hash    TEXT,
    content_length  INTEGER,
    resource_at     TEXT,
    pipeline_state  TEXT NOT NULL DEFAULT 'discovered',
    metadata        TEXT DEFAULT '{}',
    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at      TEXT
);

CREATE TABLE IF NOT EXISTS omnidata_chunks (
    id              TEXT PRIMARY KEY,
    resource_id     TEXT NOT NULL REFERENCES omnidata_resources(id),
    chunk_index     INTEGER NOT NULL,
    content         TEXT NOT NULL,
    embedding       BLOB,
    embedding_model TEXT,
    metadata        TEXT DEFAULT '{}',
    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at      TEXT
);

CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
    content,
    content=omnidata_chunks,
    content_rowid=rowid
);

CREATE TABLE IF NOT EXISTS omnidata_queue (
    id              TEXT PRIMARY KEY,
    resource_id     TEXT NOT NULL REFERENCES omnidata_resources(id),
    task_type       TEXT NOT NULL,
    priority        INTEGER NOT NULL DEFAULT 0,
    status          TEXT NOT NULL DEFAULT 'pending',
    attempts        INTEGER NOT NULL DEFAULT 0,
    max_attempts    INTEGER NOT NULL DEFAULT 3,
    metadata        TEXT DEFAULT '{}',
    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at      TEXT
);

CREATE TABLE IF NOT EXISTS omnidata_kv (
    key             TEXT PRIMARY KEY,
    value           TEXT NOT NULL,
    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
);

-- Indexes for index.db
CREATE INDEX IF NOT EXISTS idx_resources_uri ON omnidata_resources(uri);
CREATE INDEX IF NOT EXISTS idx_resources_pipeline ON omnidata_resources(pipeline_state) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_resources_content_hash ON omnidata_resources(content_hash);
CREATE INDEX IF NOT EXISTS idx_resources_adapter ON omnidata_resources(adapter_name) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_chunks_resource ON omnidata_chunks(resource_id, chunk_index);
CREATE INDEX IF NOT EXISTS idx_queue_status ON omnidata_queue(status, priority DESC) WHERE deleted_at IS NULL;

-- FTS5 sync triggers
CREATE TRIGGER IF NOT EXISTS fts_chunks_ai AFTER INSERT ON omnidata_chunks BEGIN
    INSERT INTO fts_chunks(rowid, content) VALUES (new.rowid, new.content);
END;

CREATE TRIGGER IF NOT EXISTS fts_chunks_ad AFTER DELETE ON omnidata_chunks BEGIN
    INSERT INTO fts_chunks(fts_chunks, rowid, content) VALUES ('delete', old.rowid, old.content);
END;

CREATE TRIGGER IF NOT EXISTS fts_chunks_au AFTER UPDATE OF content ON omnidata_chunks BEGIN
    INSERT INTO fts_chunks(fts_chunks, rowid, content) VALUES ('delete', old.rowid, old.content);
    INSERT INTO fts_chunks(rowid, content) VALUES (new.rowid, new.content);
END;
INDEXSQL

Step 4: Bootstrap memory.db

sqlite3 my-instance.omnidata/memory.db << 'MEMORYSQL'
PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
PRAGMA cache_size = -64000;

CREATE TABLE IF NOT EXISTS omnidata_collections (
    id          TEXT PRIMARY KEY,
    name        TEXT NOT NULL,
    parent_id   TEXT REFERENCES omnidata_collections(id),
    description TEXT,
    metadata    TEXT DEFAULT '{}',
    created_at  TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    updated_at  TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at  TEXT
);

CREATE TABLE IF NOT EXISTS omnidata_edges (
    id          TEXT PRIMARY KEY,
    source_id   TEXT NOT NULL,
    target_id   TEXT NOT NULL,
    edge_type   TEXT NOT NULL,
    metadata    TEXT DEFAULT '{}',
    created_at  TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at  TEXT
);

CREATE TABLE IF NOT EXISTS omnidata_tags (
    id          TEXT PRIMARY KEY,
    resource_id TEXT NOT NULL,
    tag         TEXT NOT NULL,
    created_at  TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at  TEXT
);

CREATE TABLE IF NOT EXISTS omnidata_memory (
    id              TEXT PRIMARY KEY,
    memory_type     TEXT NOT NULL,
    subject         TEXT,
    predicate       TEXT,
    object          TEXT,
    confidence      REAL DEFAULT 1.0,
    source_id       TEXT,
    metadata        TEXT DEFAULT '{}',
    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
    deleted_at      TEXT
);

-- Indexes for memory.db
CREATE INDEX IF NOT EXISTS idx_collections_parent ON omnidata_collections(parent_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_edges_source ON omnidata_edges(source_id, edge_type) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_edges_target ON omnidata_edges(target_id, edge_type) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_tags_resource ON omnidata_tags(resource_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_tags_tag ON omnidata_tags(tag) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_memory_type ON omnidata_memory(memory_type) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_memory_subject ON omnidata_memory(subject) WHERE deleted_at IS NULL;
MEMORYSQL

Step 5: Create adapters.json

cat > my-instance.omnidata/adapters.json << 'EOF'
{
  "adapters": []
}
EOF

An empty adapter array is valid. Adapters are registered when the runtime configures them for the first time.

Step 6: Initialize ingress.log

touch my-instance.omnidata/ingress.log

An empty ingress log is valid. Lines are appended as resources are ingested.

Complete script

Here is the full bootstrap as a single shell script:

#!/bin/bash
set -euo pipefail

NAME="${1:?Usage: bootstrap.sh <instance-name>}"
INSTANCE_ID=$(uuidgen | tr '[:upper:]' '[:lower:]')
NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
DIR="${NAME}.omnidata"

mkdir -p "${DIR}/blobs"

cat > "${DIR}/manifest.json" << EOF
{
  "schema_version": 2,
  "instance_id": "${INSTANCE_ID}",
  "instance_name": "${NAME}",
  "owner_identity": null,
  "hat_identifier": null,
  "description": null,
  "created_at": "${NOW}",
  "updated_at": "${NOW}",
  "metadata": {}
}
EOF

sqlite3 "${DIR}/index.db" < index-bootstrap.sql
sqlite3 "${DIR}/memory.db" < memory-bootstrap.sql

cat > "${DIR}/adapters.json" << 'EOF'
{
  "adapters": []
}
EOF

touch "${DIR}/ingress.log"

echo "Created ${DIR}/ with instance_id=${INSTANCE_ID}"

Verification

After bootstrapping, verify the container is valid:

# Check all required files exist
ls -la my-instance.omnidata/
# Expected: manifest.json, index.db, memory.db, blobs/, adapters.json, ingress.log

# Check manifest
cat my-instance.omnidata/manifest.json | python3 -m json.tool

# Check index.db tables
sqlite3 my-instance.omnidata/index.db ".tables"
# Expected: fts_chunks, omnidata_chunks, omnidata_kv, omnidata_queue, omnidata_resources

# Check memory.db tables
sqlite3 my-instance.omnidata/memory.db ".tables"
# Expected: omnidata_collections, omnidata_edges, omnidata_memory, omnidata_tags

# Check PRAGMAs
sqlite3 my-instance.omnidata/index.db "PRAGMA journal_mode;"
# Expected: wal

# Check adapters.json
cat my-instance.omnidata/adapters.json | python3 -m json.tool