Bootstrapping a Container
Creating a new .omnidata container is a multi-step process: create the directory structure, write the manifest, bootstrap both SQLite databases, initialize the blob store, create the adapter registry, and start the ingress log.
Step 1: Create the directory
mkdir -p my-instance.omnidata/blobs
Step 2: Write manifest.json
cat > my-instance.omnidata/manifest.json << 'EOF'
{
"schema_version": 2,
"instance_id": "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
"instance_name": "my-instance",
"owner_identity": "[email protected]",
"hat_identifier": "my-hat",
"description": "Knowledge container for my-hat role",
"created_at": "2026-04-01T12:00:00Z",
"updated_at": "2026-04-01T12:00:00Z",
"metadata": {}
}
EOF
The instance_id must be a UUID v4, generated fresh for each new instance. It is the permanent, immutable identity of this container.
Step 3: Bootstrap index.db
sqlite3 my-instance.omnidata/index.db << 'INDEXSQL'
PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
PRAGMA cache_size = -64000;
CREATE TABLE IF NOT EXISTS omnidata_resources (
id TEXT PRIMARY KEY,
uri TEXT NOT NULL UNIQUE,
adapter_name TEXT NOT NULL,
content_type TEXT,
title TEXT,
content_hash TEXT,
content_length INTEGER,
resource_at TEXT,
pipeline_state TEXT NOT NULL DEFAULT 'discovered',
metadata TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
CREATE TABLE IF NOT EXISTS omnidata_chunks (
id TEXT PRIMARY KEY,
resource_id TEXT NOT NULL REFERENCES omnidata_resources(id),
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
embedding BLOB,
embedding_model TEXT,
metadata TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
content,
content=omnidata_chunks,
content_rowid=rowid
);
CREATE TABLE IF NOT EXISTS omnidata_queue (
id TEXT PRIMARY KEY,
resource_id TEXT NOT NULL REFERENCES omnidata_resources(id),
task_type TEXT NOT NULL,
priority INTEGER NOT NULL DEFAULT 0,
status TEXT NOT NULL DEFAULT 'pending',
attempts INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
metadata TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
CREATE TABLE IF NOT EXISTS omnidata_kv (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
);
-- Indexes for index.db
CREATE INDEX IF NOT EXISTS idx_resources_uri ON omnidata_resources(uri);
CREATE INDEX IF NOT EXISTS idx_resources_pipeline ON omnidata_resources(pipeline_state) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_resources_content_hash ON omnidata_resources(content_hash);
CREATE INDEX IF NOT EXISTS idx_resources_adapter ON omnidata_resources(adapter_name) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_chunks_resource ON omnidata_chunks(resource_id, chunk_index);
CREATE INDEX IF NOT EXISTS idx_queue_status ON omnidata_queue(status, priority DESC) WHERE deleted_at IS NULL;
-- FTS5 sync triggers
CREATE TRIGGER IF NOT EXISTS fts_chunks_ai AFTER INSERT ON omnidata_chunks BEGIN
INSERT INTO fts_chunks(rowid, content) VALUES (new.rowid, new.content);
END;
CREATE TRIGGER IF NOT EXISTS fts_chunks_ad AFTER DELETE ON omnidata_chunks BEGIN
INSERT INTO fts_chunks(fts_chunks, rowid, content) VALUES ('delete', old.rowid, old.content);
END;
CREATE TRIGGER IF NOT EXISTS fts_chunks_au AFTER UPDATE OF content ON omnidata_chunks BEGIN
INSERT INTO fts_chunks(fts_chunks, rowid, content) VALUES ('delete', old.rowid, old.content);
INSERT INTO fts_chunks(rowid, content) VALUES (new.rowid, new.content);
END;
INDEXSQL
Step 4: Bootstrap memory.db
sqlite3 my-instance.omnidata/memory.db << 'MEMORYSQL'
PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
PRAGMA cache_size = -64000;
CREATE TABLE IF NOT EXISTS omnidata_collections (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
parent_id TEXT REFERENCES omnidata_collections(id),
description TEXT,
metadata TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
CREATE TABLE IF NOT EXISTS omnidata_edges (
id TEXT PRIMARY KEY,
source_id TEXT NOT NULL,
target_id TEXT NOT NULL,
edge_type TEXT NOT NULL,
metadata TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
CREATE TABLE IF NOT EXISTS omnidata_tags (
id TEXT PRIMARY KEY,
resource_id TEXT NOT NULL,
tag TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
CREATE TABLE IF NOT EXISTS omnidata_memory (
id TEXT PRIMARY KEY,
memory_type TEXT NOT NULL,
subject TEXT,
predicate TEXT,
object TEXT,
confidence REAL DEFAULT 1.0,
source_id TEXT,
metadata TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
deleted_at TEXT
);
-- Indexes for memory.db
CREATE INDEX IF NOT EXISTS idx_collections_parent ON omnidata_collections(parent_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_edges_source ON omnidata_edges(source_id, edge_type) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_edges_target ON omnidata_edges(target_id, edge_type) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_tags_resource ON omnidata_tags(resource_id) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_tags_tag ON omnidata_tags(tag) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_memory_type ON omnidata_memory(memory_type) WHERE deleted_at IS NULL;
CREATE INDEX IF NOT EXISTS idx_memory_subject ON omnidata_memory(subject) WHERE deleted_at IS NULL;
MEMORYSQL
Step 5: Create adapters.json
cat > my-instance.omnidata/adapters.json << 'EOF'
{
"adapters": []
}
EOF
An empty adapter array is valid. Adapters are registered when the runtime configures them for the first time.
Step 6: Initialize ingress.log
touch my-instance.omnidata/ingress.log
An empty ingress log is valid. Lines are appended as resources are ingested.
Complete script
Here is the full bootstrap as a single shell script:
#!/bin/bash
set -euo pipefail
NAME="${1:?Usage: bootstrap.sh <instance-name>}"
INSTANCE_ID=$(uuidgen | tr '[:upper:]' '[:lower:]')
NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
DIR="${NAME}.omnidata"
mkdir -p "${DIR}/blobs"
cat > "${DIR}/manifest.json" << EOF
{
"schema_version": 2,
"instance_id": "${INSTANCE_ID}",
"instance_name": "${NAME}",
"owner_identity": null,
"hat_identifier": null,
"description": null,
"created_at": "${NOW}",
"updated_at": "${NOW}",
"metadata": {}
}
EOF
sqlite3 "${DIR}/index.db" < index-bootstrap.sql
sqlite3 "${DIR}/memory.db" < memory-bootstrap.sql
cat > "${DIR}/adapters.json" << 'EOF'
{
"adapters": []
}
EOF
touch "${DIR}/ingress.log"
echo "Created ${DIR}/ with instance_id=${INSTANCE_ID}"
Verification
After bootstrapping, verify the container is valid:
# Check all required files exist
ls -la my-instance.omnidata/
# Expected: manifest.json, index.db, memory.db, blobs/, adapters.json, ingress.log
# Check manifest
cat my-instance.omnidata/manifest.json | python3 -m json.tool
# Check index.db tables
sqlite3 my-instance.omnidata/index.db ".tables"
# Expected: fts_chunks, omnidata_chunks, omnidata_kv, omnidata_queue, omnidata_resources
# Check memory.db tables
sqlite3 my-instance.omnidata/memory.db ".tables"
# Expected: omnidata_collections, omnidata_edges, omnidata_memory, omnidata_tags
# Check PRAGMAs
sqlite3 my-instance.omnidata/index.db "PRAGMA journal_mode;"
# Expected: wal
# Check adapters.json
cat my-instance.omnidata/adapters.json | python3 -m json.tool