Добавьте файлы проекта.
This commit is contained in:
306
Services/InstrumentCatalogService.cs
Normal file
306
Services/InstrumentCatalogService.cs
Normal file
@@ -0,0 +1,306 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using CRAWLER.Models;
|
||||
using CRAWLER.Parsing;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal sealed class InstrumentCatalogService
|
||||
{
|
||||
private readonly CatalogPageParser _catalogPageParser;
|
||||
private readonly DatabaseInitializer _databaseInitializer;
|
||||
private readonly DetailPageParser _detailPageParser;
|
||||
private readonly InstrumentRepository _repository;
|
||||
private readonly KtoPoveritClient _client;
|
||||
private readonly PdfStorageService _pdfStorageService;
|
||||
|
||||
public InstrumentCatalogService(
|
||||
DatabaseInitializer databaseInitializer,
|
||||
InstrumentRepository repository,
|
||||
CatalogPageParser catalogPageParser,
|
||||
DetailPageParser detailPageParser,
|
||||
KtoPoveritClient client,
|
||||
PdfStorageService pdfStorageService)
|
||||
{
|
||||
_databaseInitializer = databaseInitializer;
|
||||
_repository = repository;
|
||||
_catalogPageParser = catalogPageParser;
|
||||
_detailPageParser = detailPageParser;
|
||||
_client = client;
|
||||
_pdfStorageService = pdfStorageService;
|
||||
}
|
||||
|
||||
public int DefaultPagesToScan
|
||||
{
|
||||
get { return Math.Max(1, _client.Options.DefaultPagesToScan); }
|
||||
}
|
||||
|
||||
public async Task InitializeAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await _databaseInitializer.EnsureCreatedAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<InstrumentSummary>> SearchAsync(string searchText, CancellationToken cancellationToken)
|
||||
{
|
||||
return _repository.SearchAsync(searchText, cancellationToken);
|
||||
}
|
||||
|
||||
public Task<InstrumentRecord> GetByIdAsync(long id, CancellationToken cancellationToken)
|
||||
{
|
||||
return _repository.GetByIdAsync(id, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<long> SaveInstrumentAsync(InstrumentRecord record, IEnumerable<string> pendingPdfPaths, CancellationToken cancellationToken)
|
||||
{
|
||||
var id = await _repository.SaveAsync(record, cancellationToken);
|
||||
|
||||
if (pendingPdfPaths != null)
|
||||
{
|
||||
foreach (var sourcePath in pendingPdfPaths.Where(path => !string.IsNullOrWhiteSpace(path)))
|
||||
{
|
||||
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, record.RegistryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
|
||||
await _repository.SaveAttachmentAsync(new PdfAttachment
|
||||
{
|
||||
InstrumentId = id,
|
||||
Kind = "Ручной PDF",
|
||||
Title = Path.GetFileNameWithoutExtension(sourcePath),
|
||||
LocalPath = localPath,
|
||||
SourceUrl = null,
|
||||
IsManual = true
|
||||
}, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
public async Task DeleteInstrumentAsync(InstrumentRecord record, CancellationToken cancellationToken)
|
||||
{
|
||||
if (record == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var attachment in record.Attachments)
|
||||
{
|
||||
_pdfStorageService.TryDelete(attachment.LocalPath);
|
||||
}
|
||||
|
||||
await _repository.DeleteInstrumentAsync(record.Id, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task RemoveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken)
|
||||
{
|
||||
if (attachment == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_pdfStorageService.TryDelete(attachment.LocalPath);
|
||||
await _repository.DeleteAttachmentAsync(attachment.Id, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<PdfAttachment>> AddManualAttachmentsAsync(long instrumentId, string registryNumber, IEnumerable<string> sourcePaths, CancellationToken cancellationToken)
|
||||
{
|
||||
if (sourcePaths == null)
|
||||
{
|
||||
return Array.Empty<PdfAttachment>();
|
||||
}
|
||||
|
||||
var added = new List<PdfAttachment>();
|
||||
foreach (var sourcePath in sourcePaths.Where(path => !string.IsNullOrWhiteSpace(path)))
|
||||
{
|
||||
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, registryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
|
||||
var attachment = new PdfAttachment
|
||||
{
|
||||
InstrumentId = instrumentId,
|
||||
Kind = "Ручной PDF",
|
||||
Title = Path.GetFileNameWithoutExtension(sourcePath),
|
||||
SourceUrl = null,
|
||||
LocalPath = localPath,
|
||||
IsManual = true
|
||||
};
|
||||
|
||||
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
|
||||
added.Add(attachment);
|
||||
}
|
||||
|
||||
return added;
|
||||
}
|
||||
|
||||
public async Task<SyncResult> SyncFromSiteAsync(int pagesToScan, IProgress<string> progress, CancellationToken cancellationToken)
|
||||
{
|
||||
var result = new SyncResult();
|
||||
var totalPages = Math.Max(1, pagesToScan);
|
||||
|
||||
for (var page = 1; page <= totalPages; page++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
progress?.Report($"Чтение страницы {page}...");
|
||||
|
||||
IReadOnlyList<CatalogListItem> items;
|
||||
try
|
||||
{
|
||||
var catalogHtml = await _client.GetStringAsync(_client.BuildCatalogPageUrl(page), cancellationToken);
|
||||
items = _catalogPageParser.Parse(catalogHtml, _client.Options.BaseUrl);
|
||||
result.PagesScanned++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.FailedPages++;
|
||||
progress?.Report($"Страница {page} пропущена: {ex.Message}");
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
progress?.Report($"Обработка {item.RegistryNumber ?? item.Name}...");
|
||||
|
||||
try
|
||||
{
|
||||
var existingId = await _repository.FindInstrumentIdByRegistryNumberAsync(item.RegistryNumber, cancellationToken);
|
||||
var existing = existingId.HasValue
|
||||
? await _repository.GetByIdAsync(existingId.Value, cancellationToken)
|
||||
: null;
|
||||
|
||||
ParsedInstrumentDetails details = null;
|
||||
if (!string.IsNullOrWhiteSpace(item.DetailUrl))
|
||||
{
|
||||
try
|
||||
{
|
||||
var detailHtml = await _client.GetStringAsync(item.DetailUrl, cancellationToken);
|
||||
details = _detailPageParser.Parse(detailHtml, _client.Options.BaseUrl);
|
||||
}
|
||||
catch
|
||||
{
|
||||
result.SkippedDetailRequests++;
|
||||
}
|
||||
}
|
||||
|
||||
var merged = Merge(existing, item, details);
|
||||
merged.Id = existing?.Id ?? 0;
|
||||
merged.SourceSystem = "KtoPoverit";
|
||||
merged.DetailUrl = item.DetailUrl ?? existing?.DetailUrl;
|
||||
merged.LastImportedAt = DateTime.UtcNow;
|
||||
|
||||
var savedId = await _repository.SaveAsync(merged, cancellationToken);
|
||||
result.ProcessedItems++;
|
||||
if (existing == null)
|
||||
{
|
||||
result.AddedRecords++;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.UpdatedRecords++;
|
||||
}
|
||||
|
||||
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Описание типа", details?.DescriptionTypePdfUrl ?? item.DescriptionTypePdfUrl, result, cancellationToken);
|
||||
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Методика поверки", details?.MethodologyPdfUrl ?? item.MethodologyPdfUrl, result, cancellationToken);
|
||||
|
||||
if (_client.Options.RequestDelayMilliseconds > 0)
|
||||
{
|
||||
await Task.Delay(_client.Options.RequestDelayMilliseconds, cancellationToken);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.FailedItems++;
|
||||
progress?.Report($"Запись {item.RegistryNumber ?? item.Name} пропущена: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
progress?.Report($"Готово: страниц {result.PagesScanned}, записей {result.ProcessedItems}, проблемных записей {result.FailedItems}.");
|
||||
return result;
|
||||
}
|
||||
|
||||
private async Task SyncAttachmentAsync(long instrumentId, string registryNumber, string title, string sourceUrl, SyncResult result, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(sourceUrl))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var existing = await _repository.FindAttachmentBySourceUrlAsync(instrumentId, sourceUrl, cancellationToken);
|
||||
if (existing != null && !string.IsNullOrWhiteSpace(existing.LocalPath) && File.Exists(existing.LocalPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var localPath = await _pdfStorageService.DownloadAsync(sourceUrl, registryNumber, title, cancellationToken);
|
||||
var attachment = existing ?? new PdfAttachment
|
||||
{
|
||||
InstrumentId = instrumentId,
|
||||
IsManual = false
|
||||
};
|
||||
|
||||
attachment.Kind = title;
|
||||
attachment.Title = title;
|
||||
attachment.SourceUrl = sourceUrl;
|
||||
attachment.LocalPath = localPath;
|
||||
|
||||
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
|
||||
result.DownloadedPdfFiles++;
|
||||
}
|
||||
catch
|
||||
{
|
||||
result.FailedPdfFiles++;
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _repository.SaveAttachmentAsync(new PdfAttachment
|
||||
{
|
||||
InstrumentId = instrumentId,
|
||||
Kind = title,
|
||||
Title = title,
|
||||
SourceUrl = sourceUrl,
|
||||
LocalPath = null,
|
||||
IsManual = false
|
||||
}, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static InstrumentRecord Merge(InstrumentRecord existing, CatalogListItem item, ParsedInstrumentDetails details)
|
||||
{
|
||||
var result = existing?.Clone() ?? new InstrumentRecord();
|
||||
|
||||
result.RegistryNumber = Prefer(details?.RegistryNumber, item?.RegistryNumber, existing?.RegistryNumber);
|
||||
result.Name = Prefer(details?.Name, item?.Name, existing?.Name) ?? "Без названия";
|
||||
result.TypeDesignation = Prefer(details?.TypeDesignation, item?.TypeDesignation, existing?.TypeDesignation);
|
||||
result.Manufacturer = Prefer(details?.Manufacturer, item?.Manufacturer, existing?.Manufacturer);
|
||||
result.VerificationInterval = Prefer(details?.VerificationInterval, item?.VerificationInterval, existing?.VerificationInterval);
|
||||
result.CertificateOrSerialNumber = Prefer(details?.CertificateOrSerialNumber, item?.CertificateOrSerialNumber, existing?.CertificateOrSerialNumber);
|
||||
result.AllowsBatchVerification = Prefer(details?.AllowsBatchVerification, existing?.AllowsBatchVerification);
|
||||
result.HasPeriodicVerification = Prefer(details?.HasPeriodicVerification, existing?.HasPeriodicVerification);
|
||||
result.TypeInfo = Prefer(details?.TypeInfo, existing?.TypeInfo);
|
||||
result.Purpose = Prefer(details?.Purpose, existing?.Purpose);
|
||||
result.Description = Prefer(details?.Description, existing?.Description);
|
||||
result.Software = Prefer(details?.Software, existing?.Software);
|
||||
result.MetrologicalCharacteristics = Prefer(details?.MetrologicalCharacteristics, existing?.MetrologicalCharacteristics);
|
||||
result.Completeness = Prefer(details?.Completeness, existing?.Completeness);
|
||||
result.Verification = Prefer(details?.Verification, existing?.Verification);
|
||||
result.RegulatoryDocuments = Prefer(details?.RegulatoryDocuments, existing?.RegulatoryDocuments);
|
||||
result.Applicant = Prefer(details?.Applicant, existing?.Applicant);
|
||||
result.TestCenter = Prefer(details?.TestCenter, existing?.TestCenter);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static string Prefer(params string[] values)
|
||||
{
|
||||
foreach (var value in values)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return value.Trim();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user