using System; using System.Collections.Generic; using System.IO; using System.Linq; using CRAWLER.Models; using CRAWLER.Parsing; namespace CRAWLER.Services; internal sealed class InstrumentCatalogService { private readonly CatalogPageParser _catalogPageParser; private readonly DatabaseInitializer _databaseInitializer; private readonly DetailPageParser _detailPageParser; private readonly InstrumentRepository _repository; private readonly KtoPoveritClient _client; private readonly PdfStorageService _pdfStorageService; public InstrumentCatalogService( DatabaseInitializer databaseInitializer, InstrumentRepository repository, CatalogPageParser catalogPageParser, DetailPageParser detailPageParser, KtoPoveritClient client, PdfStorageService pdfStorageService) { _databaseInitializer = databaseInitializer; _repository = repository; _catalogPageParser = catalogPageParser; _detailPageParser = detailPageParser; _client = client; _pdfStorageService = pdfStorageService; } public int DefaultPagesToScan { get { return Math.Max(1, _client.Options.DefaultPagesToScan); } } public async Task InitializeAsync(CancellationToken cancellationToken) { await _databaseInitializer.EnsureCreatedAsync(cancellationToken); } public Task> SearchAsync(string searchText, CancellationToken cancellationToken) { return _repository.SearchAsync(searchText, cancellationToken); } public Task GetByIdAsync(long id, CancellationToken cancellationToken) { return _repository.GetByIdAsync(id, cancellationToken); } public async Task SaveInstrumentAsync(InstrumentRecord record, IEnumerable pendingPdfPaths, CancellationToken cancellationToken) { var id = await _repository.SaveAsync(record, cancellationToken); if (pendingPdfPaths != null) { foreach (var sourcePath in pendingPdfPaths.Where(path => !string.IsNullOrWhiteSpace(path))) { var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, record.RegistryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken); await _repository.SaveAttachmentAsync(new PdfAttachment { InstrumentId = id, Kind = "Ручной PDF", Title = Path.GetFileNameWithoutExtension(sourcePath), LocalPath = localPath, SourceUrl = null, IsManual = true }, cancellationToken); } } return id; } public async Task DeleteInstrumentAsync(InstrumentRecord record, CancellationToken cancellationToken) { if (record == null) { return; } foreach (var attachment in record.Attachments) { _pdfStorageService.TryDelete(attachment.LocalPath); } await _repository.DeleteInstrumentAsync(record.Id, cancellationToken); } public async Task RemoveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken) { if (attachment == null) { return; } _pdfStorageService.TryDelete(attachment.LocalPath); await _repository.DeleteAttachmentAsync(attachment.Id, cancellationToken); } public async Task> AddManualAttachmentsAsync(long instrumentId, string registryNumber, IEnumerable sourcePaths, CancellationToken cancellationToken) { if (sourcePaths == null) { return Array.Empty(); } var added = new List(); foreach (var sourcePath in sourcePaths.Where(path => !string.IsNullOrWhiteSpace(path))) { var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, registryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken); var attachment = new PdfAttachment { InstrumentId = instrumentId, Kind = "Ручной PDF", Title = Path.GetFileNameWithoutExtension(sourcePath), SourceUrl = null, LocalPath = localPath, IsManual = true }; await _repository.SaveAttachmentAsync(attachment, cancellationToken); added.Add(attachment); } return added; } public async Task SyncFromSiteAsync(int pagesToScan, IProgress progress, CancellationToken cancellationToken) { var result = new SyncResult(); var totalPages = Math.Max(1, pagesToScan); for (var page = 1; page <= totalPages; page++) { cancellationToken.ThrowIfCancellationRequested(); progress?.Report($"Чтение страницы {page}..."); IReadOnlyList items; try { var catalogHtml = await _client.GetStringAsync(_client.BuildCatalogPageUrl(page), cancellationToken); items = _catalogPageParser.Parse(catalogHtml, _client.Options.BaseUrl); result.PagesScanned++; } catch (Exception ex) { result.FailedPages++; progress?.Report($"Страница {page} пропущена: {ex.Message}"); continue; } foreach (var item in items) { cancellationToken.ThrowIfCancellationRequested(); progress?.Report($"Обработка {item.RegistryNumber ?? item.Name}..."); try { var existingId = await _repository.FindInstrumentIdByRegistryNumberAsync(item.RegistryNumber, cancellationToken); var existing = existingId.HasValue ? await _repository.GetByIdAsync(existingId.Value, cancellationToken) : null; ParsedInstrumentDetails details = null; if (!string.IsNullOrWhiteSpace(item.DetailUrl)) { try { var detailHtml = await _client.GetStringAsync(item.DetailUrl, cancellationToken); details = _detailPageParser.Parse(detailHtml, _client.Options.BaseUrl); } catch { result.SkippedDetailRequests++; } } var merged = Merge(existing, item, details); merged.Id = existing?.Id ?? 0; merged.SourceSystem = "KtoPoverit"; merged.DetailUrl = item.DetailUrl ?? existing?.DetailUrl; merged.LastImportedAt = DateTime.UtcNow; var savedId = await _repository.SaveAsync(merged, cancellationToken); result.ProcessedItems++; if (existing == null) { result.AddedRecords++; } else { result.UpdatedRecords++; } await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Описание типа", details?.DescriptionTypePdfUrl ?? item.DescriptionTypePdfUrl, result, cancellationToken); await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Методика поверки", details?.MethodologyPdfUrl ?? item.MethodologyPdfUrl, result, cancellationToken); if (_client.Options.RequestDelayMilliseconds > 0) { await Task.Delay(_client.Options.RequestDelayMilliseconds, cancellationToken); } } catch (Exception ex) { result.FailedItems++; progress?.Report($"Запись {item.RegistryNumber ?? item.Name} пропущена: {ex.Message}"); } } } progress?.Report($"Готово: страниц {result.PagesScanned}, записей {result.ProcessedItems}, проблемных записей {result.FailedItems}."); return result; } private async Task SyncAttachmentAsync(long instrumentId, string registryNumber, string title, string sourceUrl, SyncResult result, CancellationToken cancellationToken) { if (string.IsNullOrWhiteSpace(sourceUrl)) { return; } var existing = await _repository.FindAttachmentBySourceUrlAsync(instrumentId, sourceUrl, cancellationToken); if (existing != null && !string.IsNullOrWhiteSpace(existing.LocalPath) && File.Exists(existing.LocalPath)) { return; } try { var localPath = await _pdfStorageService.DownloadAsync(sourceUrl, registryNumber, title, cancellationToken); var attachment = existing ?? new PdfAttachment { InstrumentId = instrumentId, IsManual = false }; attachment.Kind = title; attachment.Title = title; attachment.SourceUrl = sourceUrl; attachment.LocalPath = localPath; await _repository.SaveAttachmentAsync(attachment, cancellationToken); result.DownloadedPdfFiles++; } catch { result.FailedPdfFiles++; if (existing == null) { await _repository.SaveAttachmentAsync(new PdfAttachment { InstrumentId = instrumentId, Kind = title, Title = title, SourceUrl = sourceUrl, LocalPath = null, IsManual = false }, cancellationToken); } } } private static InstrumentRecord Merge(InstrumentRecord existing, CatalogListItem item, ParsedInstrumentDetails details) { var result = existing?.Clone() ?? new InstrumentRecord(); result.RegistryNumber = Prefer(details?.RegistryNumber, item?.RegistryNumber, existing?.RegistryNumber); result.Name = Prefer(details?.Name, item?.Name, existing?.Name) ?? "Без названия"; result.TypeDesignation = Prefer(details?.TypeDesignation, item?.TypeDesignation, existing?.TypeDesignation); result.Manufacturer = Prefer(details?.Manufacturer, item?.Manufacturer, existing?.Manufacturer); result.VerificationInterval = Prefer(details?.VerificationInterval, item?.VerificationInterval, existing?.VerificationInterval); result.CertificateOrSerialNumber = Prefer(details?.CertificateOrSerialNumber, item?.CertificateOrSerialNumber, existing?.CertificateOrSerialNumber); result.AllowsBatchVerification = Prefer(details?.AllowsBatchVerification, existing?.AllowsBatchVerification); result.HasPeriodicVerification = Prefer(details?.HasPeriodicVerification, existing?.HasPeriodicVerification); result.TypeInfo = Prefer(details?.TypeInfo, existing?.TypeInfo); result.Purpose = Prefer(details?.Purpose, existing?.Purpose); result.Description = Prefer(details?.Description, existing?.Description); result.Software = Prefer(details?.Software, existing?.Software); result.MetrologicalCharacteristics = Prefer(details?.MetrologicalCharacteristics, existing?.MetrologicalCharacteristics); result.Completeness = Prefer(details?.Completeness, existing?.Completeness); result.Verification = Prefer(details?.Verification, existing?.Verification); result.RegulatoryDocuments = Prefer(details?.RegulatoryDocuments, existing?.RegulatoryDocuments); result.Applicant = Prefer(details?.Applicant, existing?.Applicant); result.TestCenter = Prefer(details?.TestCenter, existing?.TestCenter); return result; } private static string Prefer(params string[] values) { foreach (var value in values) { if (!string.IsNullOrWhiteSpace(value)) { return value.Trim(); } } return null; } }