*/ public function chunk(string $text, int $targetTokens = 100, int $overlapTokens = 20): array { $text = trim(preg_replace('/\s+/', ' ', $text) ?? $text); if ($text === '') { return []; } // Approximate tokenization by words for deterministic local chunking. $words = preg_split('/\s+/', $text) ?: []; $count = count($words); if ($count === 0) { return []; } $step = max(1, $targetTokens - $overlapTokens); $chunks = []; for ($start = 0; $start < $count; $start += $step) { $slice = array_slice($words, $start, $targetTokens); if ($slice === []) { continue; } $chunks[] = implode(' ', $slice); if (($start + $targetTokens) >= $count) { break; } } return $chunks; } }