Files
TicketAssistent/app/Services/SemanticSearchService.php

114 lines
4.4 KiB
PHP

<?php
namespace App\Services;
use App\Models\AIDecision;
use App\Models\Article;
use App\Models\Ticket;
use App\Repositories\Contracts\ArticleRepositoryInterface;
class SemanticSearchService
{
public function __construct(
private readonly EmbeddingService $embeddingService,
private readonly ArticleRepositoryInterface $articleRepository,
private readonly AIClassifierService $classifierService,
) {}
public function findBestArticle(Ticket $ticket): array
{
$queryText = $ticket->normalized_message ?: $ticket->message;
$language = (string) ($ticket->redaction_report['language'] ?? 'nl');
$embeddingContext = $this->embeddingService->context();
$ticketEmbeddingIsCurrent = $ticket->embedding !== null
&& $ticket->embedding_provider_instance_id === $embeddingContext['provider_instance_id']
&& $ticket->embedding_model === $embeddingContext['embedding_model'];
$embedding = $ticketEmbeddingIsCurrent ? $ticket->embedding : $this->embeddingService->embed($queryText);
if (! $ticketEmbeddingIsCurrent) {
$ticket->embedding = $embedding;
$ticket->embedding_provider_instance_id = $embeddingContext['provider_instance_id'];
$ticket->embedding_model = $embeddingContext['embedding_model'];
$ticket->embedded_at = now();
$ticket->save();
}
$rawCandidates = $this->articleRepository->findSimilarByEmbedding($embedding, 12, $embeddingContext);
$candidates = $this->prepareCandidates($queryText, $rawCandidates, 5);
$classification = $this->classifierService->rank($queryText, $candidates, $language);
$bestArticle = $classification->articleId ? Article::find($classification->articleId) : null;
AIDecision::query()->create([
'ticket_id' => $ticket->id,
'article_id' => $bestArticle?->id,
'confidence' => $classification->confidence,
'explanation' => $classification->explanation,
'raw_response' => $classification->rawResponse,
]);
return [
'best_article' => $bestArticle,
'confidence' => $classification->confidence,
'explanation' => $classification->explanation,
'top_3_candidates' => collect($candidates)->take(3)->map(fn ($c) => $c->toArray())->values()->all(),
'top_5_candidates' => collect($candidates)->map(fn ($c) => $c->toArray())->values()->all(),
'retrieval_meta' => [
'raw_candidates_count' => count($rawCandidates),
'deduped_candidates_count' => count($candidates),
],
'requested_tool_call' => $classification->toolCall,
'classifier_raw_response' => $classification->rawResponse,
];
}
private function prepareCandidates(string $queryText, array $candidates, int $limit): array
{
$seen = [];
$unique = [];
foreach ($candidates as $candidate) {
$dedupeKey = $candidate->sourceArticleId
? 'source_id:'.$candidate->sourceArticleId
: ($candidate->sourceUrl ? 'source_url:'.$candidate->sourceUrl : 'article:'.$candidate->articleId);
if (isset($seen[$dedupeKey])) {
continue;
}
$seen[$dedupeKey] = true;
$unique[] = $candidate;
}
$query = mb_strtolower($queryText);
$isHowTo = str_contains($query, 'hoe') || str_contains($query, 'instel') || str_contains($query, 'stap');
usort($unique, function ($a, $b) use ($isHowTo) {
$scoreA = $this->candidateScore($a->title.' '.$a->content, $a->distance, $isHowTo);
$scoreB = $this->candidateScore($b->title.' '.$b->content, $b->distance, $isHowTo);
return $scoreB <=> $scoreA;
});
return array_slice($unique, 0, $limit);
}
private function candidateScore(string $text, float $distance, bool $isHowTo): float
{
$score = 1.0 - $distance;
$haystack = mb_strtolower($text);
if ($isHowTo) {
$proceduralKeywords = ['hoe', 'stap', 'instellen', 'aanmaken', 'wijzigen', 'klik', 'menu', 'beheren', 'dns'];
foreach ($proceduralKeywords as $keyword) {
if (str_contains($haystack, $keyword)) {
$score += 0.03;
}
}
}
return $score;
}
}