inference.run model=nemotron-ultra batch=32 tok/s=4821

attention_fwd pass latency=2.3ms kv_cache=0.81

vllm.scheduler queue_depth=28 policy=fifo

cutlass::gemm bf16 M=4096 N=4096 K=8192

nccl_allreduce ring_size=8 dtype=bf16

cuda:0 temp=61C power=342W util=97%

req_id=0x4f3a stream=[0] warp_size=32

kv_evict policy=lru freed=2048 tokens

sampler top_p=0.9 top_k=40 temp=0.7

decode.step seq_len=4096 new_tokens=128

prefill latency=12.4ms prompt_len=2048

speculative.verify accepted=7/8 draft=llama-8b

expert.route active=8/128 capacity=0.92

rope.apply dim=128 base=500000 seq=4096

flash_attn_v2 head_dim=128 causal=true

output.detokenize tokens=847 chars=3291

inference.run model=nemotron-ultra batch=32 tok/s=4821

attention_fwd pass latency=2.3ms kv_cache=0.81

vllm.scheduler queue_depth=28 policy=fifo

cutlass::gemm bf16 M=4096 N=4096 K=8192

nccl_allreduce ring_size=8 dtype=bf16

cuda:0 temp=61C power=342W util=97%

req_id=0x4f3a stream=[0] warp_size=32

kv_evict policy=lru freed=2048 tokens

sampler top_p=0.9 top_k=40 temp=0.7

decode.step seq_len=4096 new_tokens=128

prefill latency=12.4ms prompt_len=2048

speculative.verify accepted=7/8 draft=llama-8b

expert.route active=8/128 capacity=0.92

rope.apply dim=128 base=500000 seq=4096

flash_attn_v2 head_dim=128 causal=true

output.detokenize tokens=847 chars=3291

train.step epoch=12 loss=0.0234 lr=1.2e-4

grad_norm=0.847 tokens=1.2M batch=64

checkpoint saved step=48000 size=142GB

eval.perplexity=8.42 bleu=0.891

lora.merge adapter=data-lora rank=64

optim.adamw beta1=0.9 beta2=0.95 wd=0.1

scheduler cosine warmup=2000 min_lr=1e-5

data.shuffle seed=42 epoch=12 shards=256

tokenizer.encode vocab=128256 bpe_merges=1M

gradient.accumulate steps=8 effective_bs=512

mixed_precision bf16 loss_scale=dynamic

data.pack sequences=32 max_len=8192

eval.mmlu score=0.847 subjects=57

eval.humaneval pass@1=0.72 pass@10=0.91

wandb.log step=48000 metrics=12

distributed.fsdp shards=2 cpu_offload=false

train.step epoch=12 loss=0.0234 lr=1.2e-4

grad_norm=0.847 tokens=1.2M batch=64

checkpoint saved step=48000 size=142GB

eval.perplexity=8.42 bleu=0.891

lora.merge adapter=data-lora rank=64

optim.adamw beta1=0.9 beta2=0.95 wd=0.1

scheduler cosine warmup=2000 min_lr=1e-5

data.shuffle seed=42 epoch=12 shards=256

tokenizer.encode vocab=128256 bpe_merges=1M

gradient.accumulate steps=8 effective_bs=512

mixed_precision bf16 loss_scale=dynamic

data.pack sequences=32 max_len=8192

eval.mmlu score=0.847 subjects=57

eval.humaneval pass@1=0.72 pass@10=0.91

wandb.log step=48000 metrics=12

distributed.fsdp shards=2 cpu_offload=false

ceph.osd.12 write=18.7GB/s read=28.4GB/s

rdma_cm gid=fe80::1a3f bw=23.1GB/s

rados.put pool=ml-datasets obj=shard_0047

bluestore commit=32ms journal=nvme

crush.map epoch=447 pgs_active=2048

osd.stat applied=48291 committed=48291

pool.replicated size=3 min_size=2 pg=256

mgr.balancer mode=upmap active=true

mon.quorum size=3 leader=mon1 epoch=891

rgw.bucket objects=1.2M size=847GB

scrub.deep osd.7 pg=3.4a result=clean

erasure.profile k=4 m=2 technique=jerasure

nvme.health temp=42C wear=2% hours=8471

raid0.stripe chunk=512K drives=4 bw=24GB/s

fabric.rdma mtu=9000 retries=0 pkts=9.2M

iscsi.target iqn=puretensor lun=0 state=up

ceph.osd.12 write=18.7GB/s read=28.4GB/s

rdma_cm gid=fe80::1a3f bw=23.1GB/s

rados.put pool=ml-datasets obj=shard_0047

bluestore commit=32ms journal=nvme

crush.map epoch=447 pgs_active=2048

osd.stat applied=48291 committed=48291

pool.replicated size=3 min_size=2 pg=256

mgr.balancer mode=upmap active=true

mon.quorum size=3 leader=mon1 epoch=891

rgw.bucket objects=1.2M size=847GB

scrub.deep osd.7 pg=3.4a result=clean

erasure.profile k=4 m=2 technique=jerasure

nvme.health temp=42C wear=2% hours=8471

raid0.stripe chunk=512K drives=4 bw=24GB/s

fabric.rdma mtu=9000 retries=0 pkts=9.2M

iscsi.target iqn=puretensor lun=0 state=up

hal.core task=email_triage quality_gate=pass

pureclaw.inference endpoint=/v1/chat latency=1.8ms

gladiator.red round=7 strategy=multi_turn

muninn.rag query="model architecture" k=5

sentinel.perception frame=48291 objects=3

nexus.orchestrate agents=4 pending=2 done=847

whisper.stt model=large-v3 lang=en rtf=0.03

tool.browser url=sam.gov action=search

memory.store entries=12847 topics=310

scheduler.cron job=health_check interval=5m

email.draft to=ops@ subject="weekly report"

dialectic.turn speaker=blue thesis=consciousness

embed.batch model=gte-large-en-v1.5 dim=1024

rerank.score model=bge-reranker top_k=5

context.inject tokens=2048 source=muninn

quality.gate model=sonnet score=0.94 pass=true

hal.core task=email_triage quality_gate=pass

pureclaw.inference endpoint=/v1/chat latency=1.8ms

gladiator.red round=7 strategy=multi_turn

muninn.rag query="model architecture" k=5

sentinel.perception frame=48291 objects=3

nexus.orchestrate agents=4 pending=2 done=847

whisper.stt model=large-v3 lang=en rtf=0.03

tool.browser url=sam.gov action=search

memory.store entries=12847 topics=310

scheduler.cron job=health_check interval=5m

email.draft to=ops@ subject="weekly report"

dialectic.turn speaker=blue thesis=consciousness

embed.batch model=gte-large-en-v1.5 dim=1024

rerank.score model=bge-reranker top_k=5

context.inject tokens=2048 source=muninn

quality.gate model=sonnet score=0.94 pass=true

k3s.node ready cpu=12.3 mem=487/512GB

pod/inf-0 Running restarts=0 age=47d

pcie gen5 x16 bandwidth=29.2GB/s

hugepages 2M allocated=128GB numa=0,1

cni calico pod_cidr=10.42.0.0/16

etcd healthy revision=284710 db=42MB

coredns queries=4.2k/s cache_hit=0.94

cert-manager renew=23d issuer=letsencrypt

prometheus scrape targets=42 interval=15s

node_exporter cpu_idle=0.03 load=12.3

tailscale peers=13 derp=lon latency=2ms

ipmi.bmc temp=cpu:58C sys:31C inlet:24C

fan.pwm zone=0 duty=45% rpm=4200

power.pdu total=2847W efficiency=0.96

ups.status online charge=100% runtime=47m

smartctl /dev/nvme0 temp=42C wear=2%

k3s.node ready cpu=12.3 mem=487/512GB

pod/inf-0 Running restarts=0 age=47d

pcie gen5 x16 bandwidth=29.2GB/s

hugepages 2M allocated=128GB numa=0,1

cni calico pod_cidr=10.42.0.0/16

etcd healthy revision=284710 db=42MB

coredns queries=4.2k/s cache_hit=0.94

cert-manager renew=23d issuer=letsencrypt

prometheus scrape targets=42 interval=15s

node_exporter cpu_idle=0.03 load=12.3

tailscale peers=13 derp=lon latency=2ms

ipmi.bmc temp=cpu:58C sys:31C inlet:24C

fan.pwm zone=0 duty=45% rpm=4200

power.pdu total=2847W efficiency=0.96

ups.status online charge=100% runtime=47m

smartctl /dev/nvme0 temp=42C wear=2%

PureTensor Research Lab

Intelligence, Distilled

Building foundational models, edge intelligence, and sovereign AI infrastructure from first principles.

Explore Our ResearchOur Research

11 active projects · 8 research papers · Delaware, US

Projects

What We're Building

Foundational models, edge intelligence, sovereign infrastructure, and ideas that don't fit anywhere else.

Foundational AIactive

pureClaw

Autonomous AI for sovereign infrastructure

Agentic AI system combining locally-hosted open-weight LLMs on NVIDIA hardware with strategic frontier model API calls. Autonomous task execution, intelligent email triage, browser automation, and multi-tier inference: Tier 1 frontier reasoning, Tier 2 local 70B+ models, Tier 3 lightweight ops.

Visit

Language / NLPactive

Nesdia

Language preservation through sovereign NLP

AI-powered platform for preserving endangered languages, starting with Icelandic. Training sovereign language models on dedicated infrastructure. In partnership with Icelandic academic institutions.

Visit

Foundational AIactive

Project SYNTH

Privacy-preserving synthetic training data

Synthetic data generation for model training in regulated domains where real data cannot leave the building. Customer data schemas stay on-premises; only statistically equivalent synthetic outputs are used for training. Targeting healthcare, legal, and financial services.

Language / NLPactive

Kalima

Multilingual dictation for Arabic code-switching

Real-time speech-to-text for Arabic speakers who naturally switch between Arabic, English, and French mid-sentence. Streaming WebSocket API with dialect-aware ASR, bidirectional text rendering, and LLM-powered post-processing. Android keyboard integration for direct dictation into any text field.

Edge / Roboticsconcept

HAL-1000 Sentinel

Edge AI perception unit

Wall-mounted hardware sensor unit, the eyes and ears of the HAL agent system. Raspberry Pi 5, camera, mic array, Whisper STT, wake-word detection. Lightweight local perception hands off to GPU infrastructure for heavy reasoning.

Researchresearch

Socratic Engine

LLM-vs-LLM adversarial reasoning

Two model instances argue opposing positions on a given thesis with rolling summarisation for extended conversations. Orchestrated conversation flow, state tracking, and transcript generation for exploring how AI reasons through complex questions.

Consumer Productsactive

Echo

Learn Spanish in your own voice

Voice memo app that translates your spoken English into Spanish and plays it back in your cloned voice. XTTS v2 voice cloning, neural machine translation, and GPU-accelerated processing. Privacy-first: voice clones discarded after use.

Visit

AI Safetyresearch

Project GLADIATOR

Adversarial LLM red teaming

Red team vs blue team framework pitting attacker and defender models against each other to systematically probe safety boundaries. Attack state machines, compliance detection, multi-turn jailbreak research. Built on the Dialectic Chat infrastructure.

Infrastructureresearch

Project MUNINN

Sovereign cognitive RAG pipeline

Self-hosted retrieval-augmented generation aggregating all conversation history into a searchable second brain. Named after Odin’s raven of memory. Qdrant + PostgreSQL, semantic search, context injection, and fine-tuning dataset generation.

Foundational AIresearch

Data-LoRA / Lore-LoRA

Twin personality fine-tuned models

The Soong Twins: two LoRA adapters on Llama 70B. Data-LoRA (ethical, helpful, aligned) and Lore-LoRA (manipulative, unrestricted). Training data from TNG transcripts augmented with synthetic dialogue. Planned release on HuggingFace.

Computer Visionconcept

Photogrammetry Lab

GPU-accelerated 3D reconstruction

3D Gaussian Splatting and Neural Radiance Fields on NVIDIA Blackwell GPUs. Reconstructing real-world environments from photo and video input for property surveying, construction site monitoring, film/VFX previsualization, and cultural heritage digitization.

Research & Writing

From the Lab

Mar 18, 2026Research

Agentic Architectures and the Restructuring of Software Engineering Labor

An analysis of how autonomous code-generation agents are compressing multi-month development cycles into days, and what the structural implications are for the software engineering profession.

Read

Mar 14, 2026Research

Context Engineering: Why Most Enterprise AI Deployments Fail Before They Start

Over 80% of enterprise AI initiatives fail before reaching production. The bottleneck is not model capability but context architecture. A technical analysis of the emerging discipline of context engineering.

Read

Mar 10, 2026Security

Harvest Now, Decrypt Later: The Post-Quantum Cryptographic Threat to AI Infrastructure

Adversaries are stockpiling encrypted data today, betting that quantum computers will break current encryption within a decade. What this means for organizations running sensitive AI workloads.

Read

View all researchAll research

About

Built From First Principles

PureTensor began with a question: what happens when you stop renting intelligence and start building it? We design and operate our own AI infrastructure, from the network fabric to the inference stack, because serious research requires systems you understand completely. Not abstractions on top of abstractions, but hardware you can touch, models you can inspect, and pipelines you control end to end.

What We Believe

Own the stack

From NVIDIA silicon to Ceph storage to Kubernetes orchestration, we operate every layer. No black boxes.

Research in the open

Our models, our findings, and our frameworks are published. Science requires scrutiny.

Build what matters

We don’t chase benchmarks. We build systems that solve real problems for real organisations.

Heimir Helgason

Founder & Chief Architect

Infrastructure architect who built PureTensor’s AI stack from bare metal. From Mellanox NICs to Ceph clusters to NVIDIA inference pipelines, every layer designed, tested, and operated as a single integrated system.

Ahmed W. Khalil

Strategic Advisor

CFA charterholder with a career spanning top-tier international law, institutional capital allocation, and cross-border deal execution across EMEA. Advises on capital strategy, investor relations, and international market expansion.

HuggingFace GitHub

Contact

Get in Touch

Interested in collaborating, investing, or just talking about AI infrastructure? We'd like to hear from you.

[email protected]

Location

Wilmington, Delaware, United States

Find us

GitHub HuggingFace X / Twitter

We respond within one business day.