Add helpdesk import progress, category model, article metadata columns, and ticket pagination controls
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
namespace App\Services;
|
||||
|
||||
use App\Models\Article;
|
||||
use App\Models\Category;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
@@ -10,54 +11,73 @@ class HelpdeskImportService
|
||||
{
|
||||
private const DEFAULT_BASE_URL = 'https://www.internettoday.nl/helpdesk';
|
||||
|
||||
public function import(?string $baseUrl = null, bool $dryRun = false, ?int $limit = null): array
|
||||
public function import(?string $baseUrl = null, bool $dryRun = false, ?int $limit = null, ?callable $progress = null): array
|
||||
{
|
||||
$baseUrl = rtrim($baseUrl ?: self::DEFAULT_BASE_URL, '/');
|
||||
|
||||
$rootHtml = $this->fetch($baseUrl);
|
||||
$categories = $this->extractCategories($rootHtml);
|
||||
|
||||
$sectionUrls = $this->buildSectionUrls($baseUrl, $categories);
|
||||
$articleUrls = $this->collectArticleUrls($baseUrl, $rootHtml, $sectionUrls);
|
||||
$categoryMap = $this->syncCategories($categories, $dryRun);
|
||||
$sections = $this->buildSections($baseUrl, $categories);
|
||||
|
||||
$articleUrlMap = $this->collectArticleUrls($baseUrl, $rootHtml, $sections);
|
||||
if ($limit !== null && $limit > 0) {
|
||||
$articleUrls = array_slice($articleUrls, 0, $limit);
|
||||
$articleUrlMap = array_slice($articleUrlMap, 0, $limit, true);
|
||||
}
|
||||
|
||||
$total = count($articleUrlMap);
|
||||
$imported = 0;
|
||||
$updated = 0;
|
||||
$skipped = 0;
|
||||
$processed = 0;
|
||||
|
||||
foreach ($articleUrls as $articleUrl) {
|
||||
foreach ($articleUrlMap as $articleUrl => $meta) {
|
||||
$processed++;
|
||||
$parsed = $this->parseArticlePage($articleUrl);
|
||||
if ($parsed === null) {
|
||||
$skipped++;
|
||||
$progress && $progress($processed, $total, $articleUrl, 'skipped');
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($dryRun) {
|
||||
$imported++;
|
||||
$progress && $progress($processed, $total, $articleUrl, 'dry-run');
|
||||
continue;
|
||||
}
|
||||
|
||||
[$title, $content] = $parsed;
|
||||
$result = Article::withoutEvents(function () use ($title, $content) {
|
||||
[$title, $content, $sourceArticleId] = $parsed;
|
||||
|
||||
$categoryId = $this->resolveCategoryId($meta['category_external_id'] ?? null, $categoryMap);
|
||||
$subcategoryId = $this->resolveCategoryId($meta['subcategory_external_id'] ?? null, $categoryMap);
|
||||
|
||||
$result = Article::withoutEvents(function () use ($title, $content, $articleUrl, $sourceArticleId, $categoryId, $subcategoryId) {
|
||||
return Article::query()->updateOrCreate(
|
||||
['title' => $title],
|
||||
['content' => $content]
|
||||
['source' => 'internettoday_helpdesk', 'source_article_id' => $sourceArticleId],
|
||||
[
|
||||
'title' => $title,
|
||||
'content' => $content,
|
||||
'source_url' => $articleUrl,
|
||||
'category_id' => $categoryId,
|
||||
'subcategory_id' => $subcategoryId,
|
||||
]
|
||||
);
|
||||
});
|
||||
|
||||
if ($result->wasRecentlyCreated) {
|
||||
$imported++;
|
||||
$progress && $progress($processed, $total, $articleUrl, 'imported');
|
||||
} else {
|
||||
$updated++;
|
||||
$progress && $progress($processed, $total, $articleUrl, 'updated');
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
'categories' => count($categories),
|
||||
'sections' => count($sectionUrls),
|
||||
'article_urls' => count($articleUrls),
|
||||
'sections' => count($sections),
|
||||
'article_urls' => $total,
|
||||
'imported' => $imported,
|
||||
'updated' => $updated,
|
||||
'skipped' => $skipped,
|
||||
@@ -67,11 +87,7 @@ class HelpdeskImportService
|
||||
|
||||
private function fetch(string $url): string
|
||||
{
|
||||
return Http::timeout(30)
|
||||
->retry(2, 300)
|
||||
->get($url)
|
||||
->throw()
|
||||
->body();
|
||||
return Http::timeout(30)->retry(2, 300)->get($url)->throw()->body();
|
||||
}
|
||||
|
||||
private function extractCategories(string $html): array
|
||||
@@ -84,46 +100,102 @@ class HelpdeskImportService
|
||||
return is_array($decoded) ? $decoded : [];
|
||||
}
|
||||
|
||||
private function buildSectionUrls(string $baseUrl, array $categories): array
|
||||
private function syncCategories(array $categories, bool $dryRun): array
|
||||
{
|
||||
$urls = [];
|
||||
$map = [];
|
||||
foreach ($categories as $category) {
|
||||
if (!isset($category['id'], $category['title'], $category['slug'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$parentId = null;
|
||||
if (!$dryRun) {
|
||||
$model = Category::query()->updateOrCreate(
|
||||
['external_id' => (int) $category['id']],
|
||||
['name' => (string) $category['title'], 'slug' => (string) $category['slug'], 'parent_id' => null]
|
||||
);
|
||||
$parentId = $model->id;
|
||||
}
|
||||
|
||||
$map[(int) $category['id']] = $parentId;
|
||||
|
||||
foreach (($category['children'] ?? []) as $child) {
|
||||
if (!isset($child['id'], $child['title'], $child['slug'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!$dryRun && $parentId !== null) {
|
||||
$childModel = Category::query()->updateOrCreate(
|
||||
['external_id' => (int) $child['id']],
|
||||
['name' => (string) $child['title'], 'slug' => (string) $child['slug'], 'parent_id' => $parentId]
|
||||
);
|
||||
$map[(int) $child['id']] = $childModel->id;
|
||||
} else {
|
||||
$map[(int) $child['id']] = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $map;
|
||||
}
|
||||
|
||||
private function buildSections(string $baseUrl, array $categories): array
|
||||
{
|
||||
$sections = [];
|
||||
foreach ($categories as $category) {
|
||||
if (!isset($category['id'], $category['slug'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$urls[] = sprintf('%s/%d/%s', $baseUrl, (int) $category['id'], (string) $category['slug']);
|
||||
$sections[] = [
|
||||
'url' => sprintf('%s/%d/%s', $baseUrl, (int) $category['id'], (string) $category['slug']),
|
||||
'category_external_id' => (int) $category['id'],
|
||||
'subcategory_external_id' => null,
|
||||
];
|
||||
|
||||
foreach (($category['children'] ?? []) as $child) {
|
||||
if (!isset($child['id'], $child['slug'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$urls[] = sprintf('%s/%d/%s', $baseUrl, (int) $child['id'], (string) $child['slug']);
|
||||
$sections[] = [
|
||||
'url' => sprintf('%s/%d/%s', $baseUrl, (int) $child['id'], (string) $child['slug']),
|
||||
'category_external_id' => (int) $category['id'],
|
||||
'subcategory_external_id' => (int) $child['id'],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return array_values(array_unique($urls));
|
||||
return $sections;
|
||||
}
|
||||
|
||||
private function collectArticleUrls(string $baseUrl, string $rootHtml, array $sectionUrls): array
|
||||
private function collectArticleUrls(string $baseUrl, string $rootHtml, array $sections): array
|
||||
{
|
||||
$urls = [];
|
||||
$result = [];
|
||||
$sources = array_merge([
|
||||
['url' => $baseUrl, 'category_external_id' => null, 'subcategory_external_id' => null, 'html' => $rootHtml],
|
||||
], $sections);
|
||||
|
||||
foreach (array_merge([$baseUrl], $sectionUrls) as $url) {
|
||||
foreach ($sources as $source) {
|
||||
try {
|
||||
$html = $url === $baseUrl ? $rootHtml : $this->fetch($url);
|
||||
$html = $source['html'] ?? $this->fetch($source['url']);
|
||||
} catch (\Throwable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
preg_match_all('/https:\/\/www\.internettoday\.nl\/helpdesk\/(\d+)-[a-z0-9\-]+/i', $html, $matches);
|
||||
foreach (($matches[0] ?? []) as $match) {
|
||||
$urls[] = strtolower($match);
|
||||
$url = strtolower($match);
|
||||
if (!isset($result[$url])) {
|
||||
$result[$url] = [
|
||||
'category_external_id' => $source['category_external_id'],
|
||||
'subcategory_external_id' => $source['subcategory_external_id'],
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array_values(array_unique($urls));
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function parseArticlePage(string $url): ?array
|
||||
@@ -156,9 +228,20 @@ class HelpdeskImportService
|
||||
return null;
|
||||
}
|
||||
|
||||
$content = "Source: {$url}\n\n{$content}";
|
||||
if (!preg_match('/\/helpdesk\/(\d+)-/', $url, $idMatch)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return [$title, Str::limit($content, 64000, '')];
|
||||
return [$title, Str::limit($content, 64000, ''), (int) $idMatch[1]];
|
||||
}
|
||||
|
||||
private function resolveCategoryId(?int $externalId, array $map): ?int
|
||||
{
|
||||
if ($externalId === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $map[$externalId] ?? Category::query()->where('external_id', $externalId)->value('id');
|
||||
}
|
||||
|
||||
private function sanitizeText(string $value): string
|
||||
|
||||
Reference in New Issue
Block a user