114 lines
4.4 KiB
PHP
114 lines
4.4 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use App\Models\AIDecision;
|
|
use App\Models\Article;
|
|
use App\Models\Ticket;
|
|
use App\Repositories\Contracts\ArticleRepositoryInterface;
|
|
|
|
class SemanticSearchService
|
|
{
|
|
public function __construct(
|
|
private readonly EmbeddingService $embeddingService,
|
|
private readonly ArticleRepositoryInterface $articleRepository,
|
|
private readonly AIClassifierService $classifierService,
|
|
) {}
|
|
|
|
public function findBestArticle(Ticket $ticket): array
|
|
{
|
|
$queryText = $ticket->normalized_message ?: $ticket->message;
|
|
$language = (string) ($ticket->redaction_report['language'] ?? 'nl');
|
|
$embeddingContext = $this->embeddingService->context();
|
|
|
|
$ticketEmbeddingIsCurrent = $ticket->embedding !== null
|
|
&& $ticket->embedding_provider_instance_id === $embeddingContext['provider_instance_id']
|
|
&& $ticket->embedding_model === $embeddingContext['embedding_model'];
|
|
|
|
$embedding = $ticketEmbeddingIsCurrent ? $ticket->embedding : $this->embeddingService->embed($queryText);
|
|
if (! $ticketEmbeddingIsCurrent) {
|
|
$ticket->embedding = $embedding;
|
|
$ticket->embedding_provider_instance_id = $embeddingContext['provider_instance_id'];
|
|
$ticket->embedding_model = $embeddingContext['embedding_model'];
|
|
$ticket->embedded_at = now();
|
|
$ticket->save();
|
|
}
|
|
|
|
$rawCandidates = $this->articleRepository->findSimilarByEmbedding($embedding, 12, $embeddingContext);
|
|
$candidates = $this->prepareCandidates($queryText, $rawCandidates, 5);
|
|
$classification = $this->classifierService->rank($queryText, $candidates, $language);
|
|
|
|
$bestArticle = $classification->articleId ? Article::find($classification->articleId) : null;
|
|
|
|
AIDecision::query()->create([
|
|
'ticket_id' => $ticket->id,
|
|
'article_id' => $bestArticle?->id,
|
|
'confidence' => $classification->confidence,
|
|
'explanation' => $classification->explanation,
|
|
'raw_response' => $classification->rawResponse,
|
|
]);
|
|
|
|
return [
|
|
'best_article' => $bestArticle,
|
|
'confidence' => $classification->confidence,
|
|
'explanation' => $classification->explanation,
|
|
'top_3_candidates' => collect($candidates)->take(3)->map(fn ($c) => $c->toArray())->values()->all(),
|
|
'top_5_candidates' => collect($candidates)->map(fn ($c) => $c->toArray())->values()->all(),
|
|
'retrieval_meta' => [
|
|
'raw_candidates_count' => count($rawCandidates),
|
|
'deduped_candidates_count' => count($candidates),
|
|
],
|
|
'requested_tool_call' => $classification->toolCall,
|
|
'classifier_raw_response' => $classification->rawResponse,
|
|
];
|
|
}
|
|
|
|
private function prepareCandidates(string $queryText, array $candidates, int $limit): array
|
|
{
|
|
$seen = [];
|
|
$unique = [];
|
|
|
|
foreach ($candidates as $candidate) {
|
|
$dedupeKey = $candidate->sourceArticleId
|
|
? 'source_id:'.$candidate->sourceArticleId
|
|
: ($candidate->sourceUrl ? 'source_url:'.$candidate->sourceUrl : 'article:'.$candidate->articleId);
|
|
|
|
if (isset($seen[$dedupeKey])) {
|
|
continue;
|
|
}
|
|
|
|
$seen[$dedupeKey] = true;
|
|
$unique[] = $candidate;
|
|
}
|
|
|
|
$query = mb_strtolower($queryText);
|
|
$isHowTo = str_contains($query, 'hoe') || str_contains($query, 'instel') || str_contains($query, 'stap');
|
|
|
|
usort($unique, function ($a, $b) use ($isHowTo) {
|
|
$scoreA = $this->candidateScore($a->title.' '.$a->content, $a->distance, $isHowTo);
|
|
$scoreB = $this->candidateScore($b->title.' '.$b->content, $b->distance, $isHowTo);
|
|
|
|
return $scoreB <=> $scoreA;
|
|
});
|
|
|
|
return array_slice($unique, 0, $limit);
|
|
}
|
|
|
|
private function candidateScore(string $text, float $distance, bool $isHowTo): float
|
|
{
|
|
$score = 1.0 - $distance;
|
|
$haystack = mb_strtolower($text);
|
|
|
|
if ($isHowTo) {
|
|
$proceduralKeywords = ['hoe', 'stap', 'instellen', 'aanmaken', 'wijzigen', 'klik', 'menu', 'beheren', 'dns'];
|
|
foreach ($proceduralKeywords as $keyword) {
|
|
if (str_contains($haystack, $keyword)) {
|
|
$score += 0.03;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $score;
|
|
}
|
|
}
|