307 lines
12 KiB
C#
307 lines
12 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using CRAWLER.Models;
|
|
using CRAWLER.Parsing;
|
|
|
|
namespace CRAWLER.Services;
|
|
|
|
internal sealed class InstrumentCatalogService
|
|
{
|
|
private readonly CatalogPageParser _catalogPageParser;
|
|
private readonly DatabaseInitializer _databaseInitializer;
|
|
private readonly DetailPageParser _detailPageParser;
|
|
private readonly InstrumentRepository _repository;
|
|
private readonly KtoPoveritClient _client;
|
|
private readonly PdfStorageService _pdfStorageService;
|
|
|
|
public InstrumentCatalogService(
|
|
DatabaseInitializer databaseInitializer,
|
|
InstrumentRepository repository,
|
|
CatalogPageParser catalogPageParser,
|
|
DetailPageParser detailPageParser,
|
|
KtoPoveritClient client,
|
|
PdfStorageService pdfStorageService)
|
|
{
|
|
_databaseInitializer = databaseInitializer;
|
|
_repository = repository;
|
|
_catalogPageParser = catalogPageParser;
|
|
_detailPageParser = detailPageParser;
|
|
_client = client;
|
|
_pdfStorageService = pdfStorageService;
|
|
}
|
|
|
|
public int DefaultPagesToScan
|
|
{
|
|
get { return Math.Max(1, _client.Options.DefaultPagesToScan); }
|
|
}
|
|
|
|
public async Task InitializeAsync(CancellationToken cancellationToken)
|
|
{
|
|
await _databaseInitializer.EnsureCreatedAsync(cancellationToken);
|
|
}
|
|
|
|
public Task<IReadOnlyList<InstrumentSummary>> SearchAsync(string searchText, CancellationToken cancellationToken)
|
|
{
|
|
return _repository.SearchAsync(searchText, cancellationToken);
|
|
}
|
|
|
|
public Task<InstrumentRecord> GetByIdAsync(long id, CancellationToken cancellationToken)
|
|
{
|
|
return _repository.GetByIdAsync(id, cancellationToken);
|
|
}
|
|
|
|
public async Task<long> SaveInstrumentAsync(InstrumentRecord record, IEnumerable<string> pendingPdfPaths, CancellationToken cancellationToken)
|
|
{
|
|
var id = await _repository.SaveAsync(record, cancellationToken);
|
|
|
|
if (pendingPdfPaths != null)
|
|
{
|
|
foreach (var sourcePath in pendingPdfPaths.Where(path => !string.IsNullOrWhiteSpace(path)))
|
|
{
|
|
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, record.RegistryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
|
|
await _repository.SaveAttachmentAsync(new PdfAttachment
|
|
{
|
|
InstrumentId = id,
|
|
Kind = "Ручной PDF",
|
|
Title = Path.GetFileNameWithoutExtension(sourcePath),
|
|
LocalPath = localPath,
|
|
SourceUrl = null,
|
|
IsManual = true
|
|
}, cancellationToken);
|
|
}
|
|
}
|
|
|
|
return id;
|
|
}
|
|
|
|
public async Task DeleteInstrumentAsync(InstrumentRecord record, CancellationToken cancellationToken)
|
|
{
|
|
if (record == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
foreach (var attachment in record.Attachments)
|
|
{
|
|
_pdfStorageService.TryDelete(attachment.LocalPath);
|
|
}
|
|
|
|
await _repository.DeleteInstrumentAsync(record.Id, cancellationToken);
|
|
}
|
|
|
|
public async Task RemoveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken)
|
|
{
|
|
if (attachment == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
_pdfStorageService.TryDelete(attachment.LocalPath);
|
|
await _repository.DeleteAttachmentAsync(attachment.Id, cancellationToken);
|
|
}
|
|
|
|
public async Task<IReadOnlyList<PdfAttachment>> AddManualAttachmentsAsync(long instrumentId, string registryNumber, IEnumerable<string> sourcePaths, CancellationToken cancellationToken)
|
|
{
|
|
if (sourcePaths == null)
|
|
{
|
|
return Array.Empty<PdfAttachment>();
|
|
}
|
|
|
|
var added = new List<PdfAttachment>();
|
|
foreach (var sourcePath in sourcePaths.Where(path => !string.IsNullOrWhiteSpace(path)))
|
|
{
|
|
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, registryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
|
|
var attachment = new PdfAttachment
|
|
{
|
|
InstrumentId = instrumentId,
|
|
Kind = "Ручной PDF",
|
|
Title = Path.GetFileNameWithoutExtension(sourcePath),
|
|
SourceUrl = null,
|
|
LocalPath = localPath,
|
|
IsManual = true
|
|
};
|
|
|
|
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
|
|
added.Add(attachment);
|
|
}
|
|
|
|
return added;
|
|
}
|
|
|
|
public async Task<SyncResult> SyncFromSiteAsync(int pagesToScan, IProgress<string> progress, CancellationToken cancellationToken)
|
|
{
|
|
var result = new SyncResult();
|
|
var totalPages = Math.Max(1, pagesToScan);
|
|
|
|
for (var page = 1; page <= totalPages; page++)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
progress?.Report($"Чтение страницы {page}...");
|
|
|
|
IReadOnlyList<CatalogListItem> items;
|
|
try
|
|
{
|
|
var catalogHtml = await _client.GetStringAsync(_client.BuildCatalogPageUrl(page), cancellationToken);
|
|
items = _catalogPageParser.Parse(catalogHtml, _client.Options.BaseUrl);
|
|
result.PagesScanned++;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
result.FailedPages++;
|
|
progress?.Report($"Страница {page} пропущена: {ex.Message}");
|
|
continue;
|
|
}
|
|
|
|
foreach (var item in items)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
progress?.Report($"Обработка {item.RegistryNumber ?? item.Name}...");
|
|
|
|
try
|
|
{
|
|
var existingId = await _repository.FindInstrumentIdByRegistryNumberAsync(item.RegistryNumber, cancellationToken);
|
|
var existing = existingId.HasValue
|
|
? await _repository.GetByIdAsync(existingId.Value, cancellationToken)
|
|
: null;
|
|
|
|
ParsedInstrumentDetails details = null;
|
|
if (!string.IsNullOrWhiteSpace(item.DetailUrl))
|
|
{
|
|
try
|
|
{
|
|
var detailHtml = await _client.GetStringAsync(item.DetailUrl, cancellationToken);
|
|
details = _detailPageParser.Parse(detailHtml, _client.Options.BaseUrl);
|
|
}
|
|
catch
|
|
{
|
|
result.SkippedDetailRequests++;
|
|
}
|
|
}
|
|
|
|
var merged = Merge(existing, item, details);
|
|
merged.Id = existing?.Id ?? 0;
|
|
merged.SourceSystem = "KtoPoverit";
|
|
merged.DetailUrl = item.DetailUrl ?? existing?.DetailUrl;
|
|
merged.LastImportedAt = DateTime.UtcNow;
|
|
|
|
var savedId = await _repository.SaveAsync(merged, cancellationToken);
|
|
result.ProcessedItems++;
|
|
if (existing == null)
|
|
{
|
|
result.AddedRecords++;
|
|
}
|
|
else
|
|
{
|
|
result.UpdatedRecords++;
|
|
}
|
|
|
|
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Описание типа", details?.DescriptionTypePdfUrl ?? item.DescriptionTypePdfUrl, result, cancellationToken);
|
|
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Методика поверки", details?.MethodologyPdfUrl ?? item.MethodologyPdfUrl, result, cancellationToken);
|
|
|
|
if (_client.Options.RequestDelayMilliseconds > 0)
|
|
{
|
|
await Task.Delay(_client.Options.RequestDelayMilliseconds, cancellationToken);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
result.FailedItems++;
|
|
progress?.Report($"Запись {item.RegistryNumber ?? item.Name} пропущена: {ex.Message}");
|
|
}
|
|
}
|
|
}
|
|
|
|
progress?.Report($"Готово: страниц {result.PagesScanned}, записей {result.ProcessedItems}, проблемных записей {result.FailedItems}.");
|
|
return result;
|
|
}
|
|
|
|
private async Task SyncAttachmentAsync(long instrumentId, string registryNumber, string title, string sourceUrl, SyncResult result, CancellationToken cancellationToken)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(sourceUrl))
|
|
{
|
|
return;
|
|
}
|
|
|
|
var existing = await _repository.FindAttachmentBySourceUrlAsync(instrumentId, sourceUrl, cancellationToken);
|
|
if (existing != null && !string.IsNullOrWhiteSpace(existing.LocalPath) && File.Exists(existing.LocalPath))
|
|
{
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
var localPath = await _pdfStorageService.DownloadAsync(sourceUrl, registryNumber, title, cancellationToken);
|
|
var attachment = existing ?? new PdfAttachment
|
|
{
|
|
InstrumentId = instrumentId,
|
|
IsManual = false
|
|
};
|
|
|
|
attachment.Kind = title;
|
|
attachment.Title = title;
|
|
attachment.SourceUrl = sourceUrl;
|
|
attachment.LocalPath = localPath;
|
|
|
|
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
|
|
result.DownloadedPdfFiles++;
|
|
}
|
|
catch
|
|
{
|
|
result.FailedPdfFiles++;
|
|
|
|
if (existing == null)
|
|
{
|
|
await _repository.SaveAttachmentAsync(new PdfAttachment
|
|
{
|
|
InstrumentId = instrumentId,
|
|
Kind = title,
|
|
Title = title,
|
|
SourceUrl = sourceUrl,
|
|
LocalPath = null,
|
|
IsManual = false
|
|
}, cancellationToken);
|
|
}
|
|
}
|
|
}
|
|
|
|
private static InstrumentRecord Merge(InstrumentRecord existing, CatalogListItem item, ParsedInstrumentDetails details)
|
|
{
|
|
var result = existing?.Clone() ?? new InstrumentRecord();
|
|
|
|
result.RegistryNumber = Prefer(details?.RegistryNumber, item?.RegistryNumber, existing?.RegistryNumber);
|
|
result.Name = Prefer(details?.Name, item?.Name, existing?.Name) ?? "Без названия";
|
|
result.TypeDesignation = Prefer(details?.TypeDesignation, item?.TypeDesignation, existing?.TypeDesignation);
|
|
result.Manufacturer = Prefer(details?.Manufacturer, item?.Manufacturer, existing?.Manufacturer);
|
|
result.VerificationInterval = Prefer(details?.VerificationInterval, item?.VerificationInterval, existing?.VerificationInterval);
|
|
result.CertificateOrSerialNumber = Prefer(details?.CertificateOrSerialNumber, item?.CertificateOrSerialNumber, existing?.CertificateOrSerialNumber);
|
|
result.AllowsBatchVerification = Prefer(details?.AllowsBatchVerification, existing?.AllowsBatchVerification);
|
|
result.HasPeriodicVerification = Prefer(details?.HasPeriodicVerification, existing?.HasPeriodicVerification);
|
|
result.TypeInfo = Prefer(details?.TypeInfo, existing?.TypeInfo);
|
|
result.Purpose = Prefer(details?.Purpose, existing?.Purpose);
|
|
result.Description = Prefer(details?.Description, existing?.Description);
|
|
result.Software = Prefer(details?.Software, existing?.Software);
|
|
result.MetrologicalCharacteristics = Prefer(details?.MetrologicalCharacteristics, existing?.MetrologicalCharacteristics);
|
|
result.Completeness = Prefer(details?.Completeness, existing?.Completeness);
|
|
result.Verification = Prefer(details?.Verification, existing?.Verification);
|
|
result.RegulatoryDocuments = Prefer(details?.RegulatoryDocuments, existing?.RegulatoryDocuments);
|
|
result.Applicant = Prefer(details?.Applicant, existing?.Applicant);
|
|
result.TestCenter = Prefer(details?.TestCenter, existing?.TestCenter);
|
|
return result;
|
|
}
|
|
|
|
private static string Prefer(params string[] values)
|
|
{
|
|
foreach (var value in values)
|
|
{
|
|
if (!string.IsNullOrWhiteSpace(value))
|
|
{
|
|
return value.Trim();
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|