Добавьте файлы проекта.
This commit is contained in:
141
Services/DatabaseInitializer.cs
Normal file
141
Services/DatabaseInitializer.cs
Normal file
@@ -0,0 +1,141 @@
|
||||
using Microsoft.Data.SqlClient;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal sealed class DatabaseInitializer
|
||||
{
|
||||
private readonly IDatabaseConnectionFactory _connectionFactory;
|
||||
|
||||
public DatabaseInitializer(IDatabaseConnectionFactory connectionFactory)
|
||||
{
|
||||
_connectionFactory = connectionFactory;
|
||||
}
|
||||
|
||||
public async Task EnsureCreatedAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await EnsureDatabaseExistsAsync(cancellationToken);
|
||||
await EnsureSchemaAsync(cancellationToken);
|
||||
}
|
||||
|
||||
private async Task EnsureDatabaseExistsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = _connectionFactory.CreateMasterConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
var safeDatabaseName = _connectionFactory.Options.Database.Replace("]", "]]");
|
||||
var sql = $@"
|
||||
IF DB_ID(N'{safeDatabaseName}') IS NULL
|
||||
BEGIN
|
||||
CREATE DATABASE [{safeDatabaseName}];
|
||||
END";
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
|
||||
private async Task EnsureSchemaAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
var scripts = new[]
|
||||
{
|
||||
@"
|
||||
IF OBJECT_ID(N'dbo.Instruments', N'U') IS NULL
|
||||
BEGIN
|
||||
CREATE TABLE dbo.Instruments
|
||||
(
|
||||
Id BIGINT IDENTITY(1,1) NOT NULL CONSTRAINT PK_Instruments PRIMARY KEY,
|
||||
RegistryNumber NVARCHAR(64) NULL,
|
||||
Name NVARCHAR(512) NOT NULL,
|
||||
TypeDesignation NVARCHAR(512) NULL,
|
||||
Manufacturer NVARCHAR(2000) NULL,
|
||||
VerificationInterval NVARCHAR(512) NULL,
|
||||
CertificateOrSerialNumber NVARCHAR(512) NULL,
|
||||
AllowsBatchVerification NVARCHAR(256) NULL,
|
||||
HasPeriodicVerification NVARCHAR(256) NULL,
|
||||
TypeInfo NVARCHAR(256) NULL,
|
||||
Purpose NVARCHAR(MAX) NULL,
|
||||
Description NVARCHAR(MAX) NULL,
|
||||
Software NVARCHAR(MAX) NULL,
|
||||
MetrologicalCharacteristics NVARCHAR(MAX) NULL,
|
||||
Completeness NVARCHAR(MAX) NULL,
|
||||
Verification NVARCHAR(MAX) NULL,
|
||||
RegulatoryDocuments NVARCHAR(MAX) NULL,
|
||||
Applicant NVARCHAR(MAX) NULL,
|
||||
TestCenter NVARCHAR(MAX) NULL,
|
||||
DetailUrl NVARCHAR(1024) NULL,
|
||||
SourceSystem NVARCHAR(64) NOT NULL CONSTRAINT DF_Instruments_SourceSystem DEFAULT N'Manual',
|
||||
LastImportedAt DATETIME2 NULL,
|
||||
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_Instruments_CreatedAt DEFAULT SYSUTCDATETIME(),
|
||||
UpdatedAt DATETIME2 NOT NULL CONSTRAINT DF_Instruments_UpdatedAt DEFAULT SYSUTCDATETIME()
|
||||
);
|
||||
END",
|
||||
@"
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM sys.indexes
|
||||
WHERE name = N'UX_Instruments_RegistryNumber'
|
||||
AND object_id = OBJECT_ID(N'dbo.Instruments')
|
||||
)
|
||||
BEGIN
|
||||
CREATE UNIQUE INDEX UX_Instruments_RegistryNumber
|
||||
ON dbo.Instruments (RegistryNumber)
|
||||
WHERE RegistryNumber IS NOT NULL AND RegistryNumber <> N'';
|
||||
END",
|
||||
@"
|
||||
IF OBJECT_ID(N'dbo.PdfAttachments', N'U') IS NULL
|
||||
BEGIN
|
||||
CREATE TABLE dbo.PdfAttachments
|
||||
(
|
||||
Id BIGINT IDENTITY(1,1) NOT NULL CONSTRAINT PK_PdfAttachments PRIMARY KEY,
|
||||
InstrumentId BIGINT NOT NULL,
|
||||
Kind NVARCHAR(128) NOT NULL,
|
||||
Title NVARCHAR(256) NULL,
|
||||
SourceUrl NVARCHAR(1024) NULL,
|
||||
LocalPath NVARCHAR(1024) NULL,
|
||||
IsManual BIT NOT NULL CONSTRAINT DF_PdfAttachments_IsManual DEFAULT (0),
|
||||
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_PdfAttachments_CreatedAt DEFAULT SYSUTCDATETIME(),
|
||||
CONSTRAINT FK_PdfAttachments_Instruments
|
||||
FOREIGN KEY (InstrumentId) REFERENCES dbo.Instruments(Id)
|
||||
ON DELETE CASCADE
|
||||
);
|
||||
END",
|
||||
@"
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM sys.indexes
|
||||
WHERE name = N'IX_PdfAttachments_InstrumentId'
|
||||
AND object_id = OBJECT_ID(N'dbo.PdfAttachments')
|
||||
)
|
||||
BEGIN
|
||||
CREATE INDEX IX_PdfAttachments_InstrumentId
|
||||
ON dbo.PdfAttachments (InstrumentId, CreatedAt DESC);
|
||||
END",
|
||||
@"
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM sys.indexes
|
||||
WHERE name = N'UX_PdfAttachments_InstrumentId_SourceUrl'
|
||||
AND object_id = OBJECT_ID(N'dbo.PdfAttachments')
|
||||
)
|
||||
BEGIN
|
||||
CREATE UNIQUE INDEX UX_PdfAttachments_InstrumentId_SourceUrl
|
||||
ON dbo.PdfAttachments (InstrumentId, SourceUrl)
|
||||
WHERE SourceUrl IS NOT NULL AND SourceUrl <> N'';
|
||||
END"
|
||||
};
|
||||
|
||||
foreach (var script in scripts)
|
||||
{
|
||||
await using var command = new SqlCommand(script, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
26
Services/FilePickerService.cs
Normal file
26
Services/FilePickerService.cs
Normal file
@@ -0,0 +1,26 @@
|
||||
using Microsoft.Win32;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal interface IFilePickerService
|
||||
{
|
||||
IReadOnlyList<string> PickPdfFiles(bool multiselect);
|
||||
}
|
||||
|
||||
internal sealed class FilePickerService : IFilePickerService
|
||||
{
|
||||
public IReadOnlyList<string> PickPdfFiles(bool multiselect)
|
||||
{
|
||||
var dialog = new OpenFileDialog
|
||||
{
|
||||
Filter = "PDF (*.pdf)|*.pdf",
|
||||
Multiselect = multiselect,
|
||||
CheckFileExists = true,
|
||||
CheckPathExists = true
|
||||
};
|
||||
|
||||
return dialog.ShowDialog() == true
|
||||
? dialog.FileNames
|
||||
: Array.Empty<string>();
|
||||
}
|
||||
}
|
||||
306
Services/InstrumentCatalogService.cs
Normal file
306
Services/InstrumentCatalogService.cs
Normal file
@@ -0,0 +1,306 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using CRAWLER.Models;
|
||||
using CRAWLER.Parsing;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal sealed class InstrumentCatalogService
|
||||
{
|
||||
private readonly CatalogPageParser _catalogPageParser;
|
||||
private readonly DatabaseInitializer _databaseInitializer;
|
||||
private readonly DetailPageParser _detailPageParser;
|
||||
private readonly InstrumentRepository _repository;
|
||||
private readonly KtoPoveritClient _client;
|
||||
private readonly PdfStorageService _pdfStorageService;
|
||||
|
||||
public InstrumentCatalogService(
|
||||
DatabaseInitializer databaseInitializer,
|
||||
InstrumentRepository repository,
|
||||
CatalogPageParser catalogPageParser,
|
||||
DetailPageParser detailPageParser,
|
||||
KtoPoveritClient client,
|
||||
PdfStorageService pdfStorageService)
|
||||
{
|
||||
_databaseInitializer = databaseInitializer;
|
||||
_repository = repository;
|
||||
_catalogPageParser = catalogPageParser;
|
||||
_detailPageParser = detailPageParser;
|
||||
_client = client;
|
||||
_pdfStorageService = pdfStorageService;
|
||||
}
|
||||
|
||||
public int DefaultPagesToScan
|
||||
{
|
||||
get { return Math.Max(1, _client.Options.DefaultPagesToScan); }
|
||||
}
|
||||
|
||||
public async Task InitializeAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await _databaseInitializer.EnsureCreatedAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<InstrumentSummary>> SearchAsync(string searchText, CancellationToken cancellationToken)
|
||||
{
|
||||
return _repository.SearchAsync(searchText, cancellationToken);
|
||||
}
|
||||
|
||||
public Task<InstrumentRecord> GetByIdAsync(long id, CancellationToken cancellationToken)
|
||||
{
|
||||
return _repository.GetByIdAsync(id, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<long> SaveInstrumentAsync(InstrumentRecord record, IEnumerable<string> pendingPdfPaths, CancellationToken cancellationToken)
|
||||
{
|
||||
var id = await _repository.SaveAsync(record, cancellationToken);
|
||||
|
||||
if (pendingPdfPaths != null)
|
||||
{
|
||||
foreach (var sourcePath in pendingPdfPaths.Where(path => !string.IsNullOrWhiteSpace(path)))
|
||||
{
|
||||
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, record.RegistryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
|
||||
await _repository.SaveAttachmentAsync(new PdfAttachment
|
||||
{
|
||||
InstrumentId = id,
|
||||
Kind = "Ручной PDF",
|
||||
Title = Path.GetFileNameWithoutExtension(sourcePath),
|
||||
LocalPath = localPath,
|
||||
SourceUrl = null,
|
||||
IsManual = true
|
||||
}, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
public async Task DeleteInstrumentAsync(InstrumentRecord record, CancellationToken cancellationToken)
|
||||
{
|
||||
if (record == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var attachment in record.Attachments)
|
||||
{
|
||||
_pdfStorageService.TryDelete(attachment.LocalPath);
|
||||
}
|
||||
|
||||
await _repository.DeleteInstrumentAsync(record.Id, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task RemoveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken)
|
||||
{
|
||||
if (attachment == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_pdfStorageService.TryDelete(attachment.LocalPath);
|
||||
await _repository.DeleteAttachmentAsync(attachment.Id, cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<PdfAttachment>> AddManualAttachmentsAsync(long instrumentId, string registryNumber, IEnumerable<string> sourcePaths, CancellationToken cancellationToken)
|
||||
{
|
||||
if (sourcePaths == null)
|
||||
{
|
||||
return Array.Empty<PdfAttachment>();
|
||||
}
|
||||
|
||||
var added = new List<PdfAttachment>();
|
||||
foreach (var sourcePath in sourcePaths.Where(path => !string.IsNullOrWhiteSpace(path)))
|
||||
{
|
||||
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, registryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
|
||||
var attachment = new PdfAttachment
|
||||
{
|
||||
InstrumentId = instrumentId,
|
||||
Kind = "Ручной PDF",
|
||||
Title = Path.GetFileNameWithoutExtension(sourcePath),
|
||||
SourceUrl = null,
|
||||
LocalPath = localPath,
|
||||
IsManual = true
|
||||
};
|
||||
|
||||
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
|
||||
added.Add(attachment);
|
||||
}
|
||||
|
||||
return added;
|
||||
}
|
||||
|
||||
public async Task<SyncResult> SyncFromSiteAsync(int pagesToScan, IProgress<string> progress, CancellationToken cancellationToken)
|
||||
{
|
||||
var result = new SyncResult();
|
||||
var totalPages = Math.Max(1, pagesToScan);
|
||||
|
||||
for (var page = 1; page <= totalPages; page++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
progress?.Report($"Чтение страницы {page}...");
|
||||
|
||||
IReadOnlyList<CatalogListItem> items;
|
||||
try
|
||||
{
|
||||
var catalogHtml = await _client.GetStringAsync(_client.BuildCatalogPageUrl(page), cancellationToken);
|
||||
items = _catalogPageParser.Parse(catalogHtml, _client.Options.BaseUrl);
|
||||
result.PagesScanned++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.FailedPages++;
|
||||
progress?.Report($"Страница {page} пропущена: {ex.Message}");
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
progress?.Report($"Обработка {item.RegistryNumber ?? item.Name}...");
|
||||
|
||||
try
|
||||
{
|
||||
var existingId = await _repository.FindInstrumentIdByRegistryNumberAsync(item.RegistryNumber, cancellationToken);
|
||||
var existing = existingId.HasValue
|
||||
? await _repository.GetByIdAsync(existingId.Value, cancellationToken)
|
||||
: null;
|
||||
|
||||
ParsedInstrumentDetails details = null;
|
||||
if (!string.IsNullOrWhiteSpace(item.DetailUrl))
|
||||
{
|
||||
try
|
||||
{
|
||||
var detailHtml = await _client.GetStringAsync(item.DetailUrl, cancellationToken);
|
||||
details = _detailPageParser.Parse(detailHtml, _client.Options.BaseUrl);
|
||||
}
|
||||
catch
|
||||
{
|
||||
result.SkippedDetailRequests++;
|
||||
}
|
||||
}
|
||||
|
||||
var merged = Merge(existing, item, details);
|
||||
merged.Id = existing?.Id ?? 0;
|
||||
merged.SourceSystem = "KtoPoverit";
|
||||
merged.DetailUrl = item.DetailUrl ?? existing?.DetailUrl;
|
||||
merged.LastImportedAt = DateTime.UtcNow;
|
||||
|
||||
var savedId = await _repository.SaveAsync(merged, cancellationToken);
|
||||
result.ProcessedItems++;
|
||||
if (existing == null)
|
||||
{
|
||||
result.AddedRecords++;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.UpdatedRecords++;
|
||||
}
|
||||
|
||||
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Описание типа", details?.DescriptionTypePdfUrl ?? item.DescriptionTypePdfUrl, result, cancellationToken);
|
||||
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Методика поверки", details?.MethodologyPdfUrl ?? item.MethodologyPdfUrl, result, cancellationToken);
|
||||
|
||||
if (_client.Options.RequestDelayMilliseconds > 0)
|
||||
{
|
||||
await Task.Delay(_client.Options.RequestDelayMilliseconds, cancellationToken);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
result.FailedItems++;
|
||||
progress?.Report($"Запись {item.RegistryNumber ?? item.Name} пропущена: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
progress?.Report($"Готово: страниц {result.PagesScanned}, записей {result.ProcessedItems}, проблемных записей {result.FailedItems}.");
|
||||
return result;
|
||||
}
|
||||
|
||||
private async Task SyncAttachmentAsync(long instrumentId, string registryNumber, string title, string sourceUrl, SyncResult result, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(sourceUrl))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var existing = await _repository.FindAttachmentBySourceUrlAsync(instrumentId, sourceUrl, cancellationToken);
|
||||
if (existing != null && !string.IsNullOrWhiteSpace(existing.LocalPath) && File.Exists(existing.LocalPath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var localPath = await _pdfStorageService.DownloadAsync(sourceUrl, registryNumber, title, cancellationToken);
|
||||
var attachment = existing ?? new PdfAttachment
|
||||
{
|
||||
InstrumentId = instrumentId,
|
||||
IsManual = false
|
||||
};
|
||||
|
||||
attachment.Kind = title;
|
||||
attachment.Title = title;
|
||||
attachment.SourceUrl = sourceUrl;
|
||||
attachment.LocalPath = localPath;
|
||||
|
||||
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
|
||||
result.DownloadedPdfFiles++;
|
||||
}
|
||||
catch
|
||||
{
|
||||
result.FailedPdfFiles++;
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _repository.SaveAttachmentAsync(new PdfAttachment
|
||||
{
|
||||
InstrumentId = instrumentId,
|
||||
Kind = title,
|
||||
Title = title,
|
||||
SourceUrl = sourceUrl,
|
||||
LocalPath = null,
|
||||
IsManual = false
|
||||
}, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static InstrumentRecord Merge(InstrumentRecord existing, CatalogListItem item, ParsedInstrumentDetails details)
|
||||
{
|
||||
var result = existing?.Clone() ?? new InstrumentRecord();
|
||||
|
||||
result.RegistryNumber = Prefer(details?.RegistryNumber, item?.RegistryNumber, existing?.RegistryNumber);
|
||||
result.Name = Prefer(details?.Name, item?.Name, existing?.Name) ?? "Без названия";
|
||||
result.TypeDesignation = Prefer(details?.TypeDesignation, item?.TypeDesignation, existing?.TypeDesignation);
|
||||
result.Manufacturer = Prefer(details?.Manufacturer, item?.Manufacturer, existing?.Manufacturer);
|
||||
result.VerificationInterval = Prefer(details?.VerificationInterval, item?.VerificationInterval, existing?.VerificationInterval);
|
||||
result.CertificateOrSerialNumber = Prefer(details?.CertificateOrSerialNumber, item?.CertificateOrSerialNumber, existing?.CertificateOrSerialNumber);
|
||||
result.AllowsBatchVerification = Prefer(details?.AllowsBatchVerification, existing?.AllowsBatchVerification);
|
||||
result.HasPeriodicVerification = Prefer(details?.HasPeriodicVerification, existing?.HasPeriodicVerification);
|
||||
result.TypeInfo = Prefer(details?.TypeInfo, existing?.TypeInfo);
|
||||
result.Purpose = Prefer(details?.Purpose, existing?.Purpose);
|
||||
result.Description = Prefer(details?.Description, existing?.Description);
|
||||
result.Software = Prefer(details?.Software, existing?.Software);
|
||||
result.MetrologicalCharacteristics = Prefer(details?.MetrologicalCharacteristics, existing?.MetrologicalCharacteristics);
|
||||
result.Completeness = Prefer(details?.Completeness, existing?.Completeness);
|
||||
result.Verification = Prefer(details?.Verification, existing?.Verification);
|
||||
result.RegulatoryDocuments = Prefer(details?.RegulatoryDocuments, existing?.RegulatoryDocuments);
|
||||
result.Applicant = Prefer(details?.Applicant, existing?.Applicant);
|
||||
result.TestCenter = Prefer(details?.TestCenter, existing?.TestCenter);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static string Prefer(params string[] values)
|
||||
{
|
||||
foreach (var value in values)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(value))
|
||||
{
|
||||
return value.Trim();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
526
Services/InstrumentRepository.cs
Normal file
526
Services/InstrumentRepository.cs
Normal file
@@ -0,0 +1,526 @@
|
||||
using CRAWLER.Models;
|
||||
using Microsoft.Data.SqlClient;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal sealed class InstrumentRepository
|
||||
{
|
||||
private readonly IDatabaseConnectionFactory _connectionFactory;
|
||||
|
||||
public InstrumentRepository(IDatabaseConnectionFactory connectionFactory)
|
||||
{
|
||||
_connectionFactory = connectionFactory;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<InstrumentSummary>> SearchAsync(string searchText, CancellationToken cancellationToken)
|
||||
{
|
||||
var items = new List<InstrumentSummary>();
|
||||
var hasFilter = !string.IsNullOrWhiteSpace(searchText);
|
||||
|
||||
const string sql = @"
|
||||
SELECT TOP (500)
|
||||
Id,
|
||||
RegistryNumber,
|
||||
Name,
|
||||
TypeDesignation,
|
||||
Manufacturer,
|
||||
VerificationInterval,
|
||||
SourceSystem,
|
||||
UpdatedAt
|
||||
FROM dbo.Instruments
|
||||
WHERE @Search IS NULL
|
||||
OR RegistryNumber LIKE @Like
|
||||
OR Name LIKE @Like
|
||||
OR TypeDesignation LIKE @Like
|
||||
OR Manufacturer LIKE @Like
|
||||
ORDER BY
|
||||
CASE WHEN RegistryNumber IS NULL OR RegistryNumber = N'' THEN 1 ELSE 0 END,
|
||||
RegistryNumber DESC,
|
||||
UpdatedAt DESC;";
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@Search", hasFilter ? searchText.Trim() : DBNull.Value);
|
||||
command.Parameters.AddWithValue("@Like", hasFilter ? $"%{searchText.Trim()}%" : DBNull.Value);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
|
||||
while (await reader.ReadAsync(cancellationToken))
|
||||
{
|
||||
items.Add(new InstrumentSummary
|
||||
{
|
||||
Id = reader.GetInt64(0),
|
||||
RegistryNumber = GetString(reader, 1),
|
||||
Name = GetString(reader, 2),
|
||||
TypeDesignation = GetString(reader, 3),
|
||||
Manufacturer = GetString(reader, 4),
|
||||
VerificationInterval = GetString(reader, 5),
|
||||
SourceSystem = GetString(reader, 6),
|
||||
UpdatedAt = reader.GetDateTime(7)
|
||||
});
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
public async Task<InstrumentRecord> GetByIdAsync(long id, CancellationToken cancellationToken)
|
||||
{
|
||||
const string sql = @"
|
||||
SELECT
|
||||
Id,
|
||||
RegistryNumber,
|
||||
Name,
|
||||
TypeDesignation,
|
||||
Manufacturer,
|
||||
VerificationInterval,
|
||||
CertificateOrSerialNumber,
|
||||
AllowsBatchVerification,
|
||||
HasPeriodicVerification,
|
||||
TypeInfo,
|
||||
Purpose,
|
||||
Description,
|
||||
Software,
|
||||
MetrologicalCharacteristics,
|
||||
Completeness,
|
||||
Verification,
|
||||
RegulatoryDocuments,
|
||||
Applicant,
|
||||
TestCenter,
|
||||
DetailUrl,
|
||||
SourceSystem,
|
||||
LastImportedAt,
|
||||
CreatedAt,
|
||||
UpdatedAt
|
||||
FROM dbo.Instruments
|
||||
WHERE Id = @Id;";
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@Id", id);
|
||||
|
||||
InstrumentRecord item = null;
|
||||
await using (var reader = await command.ExecuteReaderAsync(cancellationToken))
|
||||
{
|
||||
if (await reader.ReadAsync(cancellationToken))
|
||||
{
|
||||
item = new InstrumentRecord
|
||||
{
|
||||
Id = reader.GetInt64(0),
|
||||
RegistryNumber = GetString(reader, 1),
|
||||
Name = GetString(reader, 2),
|
||||
TypeDesignation = GetString(reader, 3),
|
||||
Manufacturer = GetString(reader, 4),
|
||||
VerificationInterval = GetString(reader, 5),
|
||||
CertificateOrSerialNumber = GetString(reader, 6),
|
||||
AllowsBatchVerification = GetString(reader, 7),
|
||||
HasPeriodicVerification = GetString(reader, 8),
|
||||
TypeInfo = GetString(reader, 9),
|
||||
Purpose = GetString(reader, 10),
|
||||
Description = GetString(reader, 11),
|
||||
Software = GetString(reader, 12),
|
||||
MetrologicalCharacteristics = GetString(reader, 13),
|
||||
Completeness = GetString(reader, 14),
|
||||
Verification = GetString(reader, 15),
|
||||
RegulatoryDocuments = GetString(reader, 16),
|
||||
Applicant = GetString(reader, 17),
|
||||
TestCenter = GetString(reader, 18),
|
||||
DetailUrl = GetString(reader, 19),
|
||||
SourceSystem = GetString(reader, 20),
|
||||
LastImportedAt = reader.IsDBNull(21) ? (DateTime?)null : reader.GetDateTime(21),
|
||||
CreatedAt = reader.GetDateTime(22),
|
||||
UpdatedAt = reader.GetDateTime(23)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (item == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
item.Attachments = (await GetAttachmentsAsync(connection, id, cancellationToken)).ToList();
|
||||
return item;
|
||||
}
|
||||
|
||||
public async Task<long?> FindInstrumentIdByRegistryNumberAsync(string registryNumber, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(registryNumber))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
const string sql = "SELECT Id FROM dbo.Instruments WHERE RegistryNumber = @RegistryNumber;";
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@RegistryNumber", registryNumber.Trim());
|
||||
|
||||
var result = await command.ExecuteScalarAsync(cancellationToken);
|
||||
if (result == null || result == DBNull.Value)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return Convert.ToInt64(result);
|
||||
}
|
||||
|
||||
public async Task<long> SaveAsync(InstrumentRecord record, CancellationToken cancellationToken)
|
||||
{
|
||||
if (record == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(record));
|
||||
}
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
if (record.Id <= 0)
|
||||
{
|
||||
const string insertSql = @"
|
||||
INSERT INTO dbo.Instruments
|
||||
(
|
||||
RegistryNumber,
|
||||
Name,
|
||||
TypeDesignation,
|
||||
Manufacturer,
|
||||
VerificationInterval,
|
||||
CertificateOrSerialNumber,
|
||||
AllowsBatchVerification,
|
||||
HasPeriodicVerification,
|
||||
TypeInfo,
|
||||
Purpose,
|
||||
Description,
|
||||
Software,
|
||||
MetrologicalCharacteristics,
|
||||
Completeness,
|
||||
Verification,
|
||||
RegulatoryDocuments,
|
||||
Applicant,
|
||||
TestCenter,
|
||||
DetailUrl,
|
||||
SourceSystem,
|
||||
LastImportedAt,
|
||||
CreatedAt,
|
||||
UpdatedAt
|
||||
)
|
||||
OUTPUT INSERTED.Id
|
||||
VALUES
|
||||
(
|
||||
@RegistryNumber,
|
||||
@Name,
|
||||
@TypeDesignation,
|
||||
@Manufacturer,
|
||||
@VerificationInterval,
|
||||
@CertificateOrSerialNumber,
|
||||
@AllowsBatchVerification,
|
||||
@HasPeriodicVerification,
|
||||
@TypeInfo,
|
||||
@Purpose,
|
||||
@Description,
|
||||
@Software,
|
||||
@MetrologicalCharacteristics,
|
||||
@Completeness,
|
||||
@Verification,
|
||||
@RegulatoryDocuments,
|
||||
@Applicant,
|
||||
@TestCenter,
|
||||
@DetailUrl,
|
||||
@SourceSystem,
|
||||
@LastImportedAt,
|
||||
SYSUTCDATETIME(),
|
||||
SYSUTCDATETIME()
|
||||
);";
|
||||
|
||||
await using var command = CreateRecordCommand(insertSql, connection, record);
|
||||
var id = await command.ExecuteScalarAsync(cancellationToken);
|
||||
return Convert.ToInt64(id);
|
||||
}
|
||||
|
||||
const string updateSql = @"
|
||||
UPDATE dbo.Instruments
|
||||
SET
|
||||
RegistryNumber = @RegistryNumber,
|
||||
Name = @Name,
|
||||
TypeDesignation = @TypeDesignation,
|
||||
Manufacturer = @Manufacturer,
|
||||
VerificationInterval = @VerificationInterval,
|
||||
CertificateOrSerialNumber = @CertificateOrSerialNumber,
|
||||
AllowsBatchVerification = @AllowsBatchVerification,
|
||||
HasPeriodicVerification = @HasPeriodicVerification,
|
||||
TypeInfo = @TypeInfo,
|
||||
Purpose = @Purpose,
|
||||
Description = @Description,
|
||||
Software = @Software,
|
||||
MetrologicalCharacteristics = @MetrologicalCharacteristics,
|
||||
Completeness = @Completeness,
|
||||
Verification = @Verification,
|
||||
RegulatoryDocuments = @RegulatoryDocuments,
|
||||
Applicant = @Applicant,
|
||||
TestCenter = @TestCenter,
|
||||
DetailUrl = @DetailUrl,
|
||||
SourceSystem = @SourceSystem,
|
||||
LastImportedAt = @LastImportedAt,
|
||||
UpdatedAt = SYSUTCDATETIME()
|
||||
WHERE Id = @Id;";
|
||||
|
||||
await using (var command = CreateRecordCommand(updateSql, connection, record))
|
||||
{
|
||||
command.Parameters.AddWithValue("@Id", record.Id);
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
|
||||
return record.Id;
|
||||
}
|
||||
|
||||
public async Task<PdfAttachment> FindAttachmentBySourceUrlAsync(long instrumentId, string sourceUrl, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(sourceUrl))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
const string sql = @"
|
||||
SELECT
|
||||
Id,
|
||||
InstrumentId,
|
||||
Kind,
|
||||
Title,
|
||||
SourceUrl,
|
||||
LocalPath,
|
||||
IsManual,
|
||||
CreatedAt
|
||||
FROM dbo.PdfAttachments
|
||||
WHERE InstrumentId = @InstrumentId
|
||||
AND SourceUrl = @SourceUrl;";
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@InstrumentId", instrumentId);
|
||||
command.Parameters.AddWithValue("@SourceUrl", sourceUrl);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
|
||||
if (!await reader.ReadAsync(cancellationToken))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return new PdfAttachment
|
||||
{
|
||||
Id = reader.GetInt64(0),
|
||||
InstrumentId = reader.GetInt64(1),
|
||||
Kind = GetString(reader, 2),
|
||||
Title = GetString(reader, 3),
|
||||
SourceUrl = GetString(reader, 4),
|
||||
LocalPath = GetString(reader, 5),
|
||||
IsManual = reader.GetBoolean(6),
|
||||
CreatedAt = reader.GetDateTime(7)
|
||||
};
|
||||
}
|
||||
|
||||
public async Task SaveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken)
|
||||
{
|
||||
if (attachment == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(attachment));
|
||||
}
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
if (attachment.Id <= 0)
|
||||
{
|
||||
const string insertSql = @"
|
||||
INSERT INTO dbo.PdfAttachments
|
||||
(
|
||||
InstrumentId,
|
||||
Kind,
|
||||
Title,
|
||||
SourceUrl,
|
||||
LocalPath,
|
||||
IsManual,
|
||||
CreatedAt
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
@InstrumentId,
|
||||
@Kind,
|
||||
@Title,
|
||||
@SourceUrl,
|
||||
@LocalPath,
|
||||
@IsManual,
|
||||
SYSUTCDATETIME()
|
||||
);";
|
||||
|
||||
await using var command = CreateAttachmentCommand(insertSql, connection, attachment);
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
return;
|
||||
}
|
||||
|
||||
const string updateSql = @"
|
||||
UPDATE dbo.PdfAttachments
|
||||
SET
|
||||
Kind = @Kind,
|
||||
Title = @Title,
|
||||
SourceUrl = @SourceUrl,
|
||||
LocalPath = @LocalPath,
|
||||
IsManual = @IsManual
|
||||
WHERE Id = @Id;";
|
||||
|
||||
await using (var command = CreateAttachmentCommand(updateSql, connection, attachment))
|
||||
{
|
||||
command.Parameters.AddWithValue("@Id", attachment.Id);
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task DeleteAttachmentAsync(long attachmentId, CancellationToken cancellationToken)
|
||||
{
|
||||
const string sql = "DELETE FROM dbo.PdfAttachments WHERE Id = @Id;";
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@Id", attachmentId);
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public async Task DeleteInstrumentAsync(long id, CancellationToken cancellationToken)
|
||||
{
|
||||
const string sql = "DELETE FROM dbo.Instruments WHERE Id = @Id;";
|
||||
|
||||
await using var connection = _connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken);
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@Id", id);
|
||||
await command.ExecuteNonQueryAsync(cancellationToken);
|
||||
}
|
||||
|
||||
private async Task<IReadOnlyList<PdfAttachment>> GetAttachmentsAsync(SqlConnection connection, long instrumentId, CancellationToken cancellationToken)
|
||||
{
|
||||
const string sql = @"
|
||||
SELECT
|
||||
Id,
|
||||
InstrumentId,
|
||||
Kind,
|
||||
Title,
|
||||
SourceUrl,
|
||||
LocalPath,
|
||||
IsManual,
|
||||
CreatedAt
|
||||
FROM dbo.PdfAttachments
|
||||
WHERE InstrumentId = @InstrumentId
|
||||
ORDER BY CreatedAt DESC, Id DESC;";
|
||||
|
||||
var items = new List<PdfAttachment>();
|
||||
|
||||
await using var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
command.Parameters.AddWithValue("@InstrumentId", instrumentId);
|
||||
|
||||
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
|
||||
while (await reader.ReadAsync(cancellationToken))
|
||||
{
|
||||
items.Add(new PdfAttachment
|
||||
{
|
||||
Id = reader.GetInt64(0),
|
||||
InstrumentId = reader.GetInt64(1),
|
||||
Kind = GetString(reader, 2),
|
||||
Title = GetString(reader, 3),
|
||||
SourceUrl = GetString(reader, 4),
|
||||
LocalPath = GetString(reader, 5),
|
||||
IsManual = reader.GetBoolean(6),
|
||||
CreatedAt = reader.GetDateTime(7)
|
||||
});
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
private SqlCommand CreateRecordCommand(string sql, SqlConnection connection, InstrumentRecord record)
|
||||
{
|
||||
var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
|
||||
command.Parameters.AddWithValue("@RegistryNumber", ToDbValue(record.RegistryNumber));
|
||||
command.Parameters.AddWithValue("@Name", string.IsNullOrWhiteSpace(record.Name) ? "Без названия" : record.Name.Trim());
|
||||
command.Parameters.AddWithValue("@TypeDesignation", ToDbValue(record.TypeDesignation));
|
||||
command.Parameters.AddWithValue("@Manufacturer", ToDbValue(record.Manufacturer));
|
||||
command.Parameters.AddWithValue("@VerificationInterval", ToDbValue(record.VerificationInterval));
|
||||
command.Parameters.AddWithValue("@CertificateOrSerialNumber", ToDbValue(record.CertificateOrSerialNumber));
|
||||
command.Parameters.AddWithValue("@AllowsBatchVerification", ToDbValue(record.AllowsBatchVerification));
|
||||
command.Parameters.AddWithValue("@HasPeriodicVerification", ToDbValue(record.HasPeriodicVerification));
|
||||
command.Parameters.AddWithValue("@TypeInfo", ToDbValue(record.TypeInfo));
|
||||
command.Parameters.AddWithValue("@Purpose", ToDbValue(record.Purpose));
|
||||
command.Parameters.AddWithValue("@Description", ToDbValue(record.Description));
|
||||
command.Parameters.AddWithValue("@Software", ToDbValue(record.Software));
|
||||
command.Parameters.AddWithValue("@MetrologicalCharacteristics", ToDbValue(record.MetrologicalCharacteristics));
|
||||
command.Parameters.AddWithValue("@Completeness", ToDbValue(record.Completeness));
|
||||
command.Parameters.AddWithValue("@Verification", ToDbValue(record.Verification));
|
||||
command.Parameters.AddWithValue("@RegulatoryDocuments", ToDbValue(record.RegulatoryDocuments));
|
||||
command.Parameters.AddWithValue("@Applicant", ToDbValue(record.Applicant));
|
||||
command.Parameters.AddWithValue("@TestCenter", ToDbValue(record.TestCenter));
|
||||
command.Parameters.AddWithValue("@DetailUrl", ToDbValue(record.DetailUrl));
|
||||
command.Parameters.AddWithValue("@SourceSystem", string.IsNullOrWhiteSpace(record.SourceSystem) ? "Manual" : record.SourceSystem.Trim());
|
||||
command.Parameters.AddWithValue("@LastImportedAt", record.LastImportedAt.HasValue ? record.LastImportedAt.Value : DBNull.Value);
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
private SqlCommand CreateAttachmentCommand(string sql, SqlConnection connection, PdfAttachment attachment)
|
||||
{
|
||||
var command = new SqlCommand(sql, connection)
|
||||
{
|
||||
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
|
||||
};
|
||||
|
||||
command.Parameters.AddWithValue("@InstrumentId", attachment.InstrumentId);
|
||||
command.Parameters.AddWithValue("@Kind", string.IsNullOrWhiteSpace(attachment.Kind) ? "PDF" : attachment.Kind.Trim());
|
||||
command.Parameters.AddWithValue("@Title", ToDbValue(attachment.Title));
|
||||
command.Parameters.AddWithValue("@SourceUrl", ToDbValue(attachment.SourceUrl));
|
||||
command.Parameters.AddWithValue("@LocalPath", ToDbValue(attachment.LocalPath));
|
||||
command.Parameters.AddWithValue("@IsManual", attachment.IsManual);
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
private static object ToDbValue(string value)
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(value) ? DBNull.Value : value.Trim();
|
||||
}
|
||||
|
||||
private static string GetString(SqlDataReader reader, int index)
|
||||
{
|
||||
return reader.IsDBNull(index) ? null : reader.GetString(index);
|
||||
}
|
||||
}
|
||||
187
Services/KtoPoveritClient.cs
Normal file
187
Services/KtoPoveritClient.cs
Normal file
@@ -0,0 +1,187 @@
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using CRAWLER.Configuration;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal sealed class KtoPoveritClient : IDisposable
|
||||
{
|
||||
private readonly CrawlerOptions _options;
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
public KtoPoveritClient(IConfiguration configuration)
|
||||
{
|
||||
_options = configuration.GetSection("Crawler").Get<CrawlerOptions>()
|
||||
?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json.");
|
||||
|
||||
var handler = new SocketsHttpHandler
|
||||
{
|
||||
AutomaticDecompression = DecompressionMethods.All,
|
||||
AllowAutoRedirect = false
|
||||
};
|
||||
|
||||
_httpClient = new HttpClient(handler)
|
||||
{
|
||||
Timeout = TimeSpan.FromSeconds(Math.Max(5, _options.TimeoutSeconds))
|
||||
};
|
||||
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(_options.UserAgent);
|
||||
_httpClient.DefaultRequestHeaders.AcceptLanguage.ParseAdd("ru-RU,ru;q=0.9,en-US;q=0.8");
|
||||
}
|
||||
|
||||
public CrawlerOptions Options
|
||||
{
|
||||
get { return _options; }
|
||||
}
|
||||
|
||||
public async Task<string> GetStringAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
using var request = CreateRequest(url);
|
||||
using var response = await SendAsync(request, cancellationToken);
|
||||
return await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<byte[]> GetBytesAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
using var request = CreateRequest(url);
|
||||
using var response = await SendAsync(request, cancellationToken);
|
||||
return await response.Content.ReadAsByteArrayAsync(cancellationToken);
|
||||
}
|
||||
|
||||
public string BuildCatalogPageUrl(int page)
|
||||
{
|
||||
var relative = string.Format(_options.CatalogPathFormat, page);
|
||||
return BuildAbsoluteUrl(relative);
|
||||
}
|
||||
|
||||
public string BuildAbsoluteUrl(string urlOrPath)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(urlOrPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri))
|
||||
{
|
||||
return absoluteUri.ToString();
|
||||
}
|
||||
|
||||
var baseUri = new Uri(_options.BaseUrl.TrimEnd('/') + "/");
|
||||
return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString();
|
||||
}
|
||||
|
||||
private HttpRequestMessage CreateRequest(string url)
|
||||
{
|
||||
return new HttpRequestMessage(HttpMethod.Get, url)
|
||||
{
|
||||
Version = HttpVersion.Version11,
|
||||
VersionPolicy = HttpVersionPolicy.RequestVersionOrLower
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||
{
|
||||
var currentUri = request.RequestUri ?? throw new InvalidOperationException("Не задан URL запроса.");
|
||||
const int maxRedirects = 10;
|
||||
|
||||
try
|
||||
{
|
||||
for (var redirectIndex = 0; redirectIndex <= maxRedirects; redirectIndex++)
|
||||
{
|
||||
using var currentRequest = CreateRequest(currentUri.ToString());
|
||||
var response = await _httpClient.SendAsync(currentRequest, HttpCompletionOption.ResponseContentRead, cancellationToken);
|
||||
|
||||
if (IsRedirectStatusCode(response.StatusCode))
|
||||
{
|
||||
var redirectUri = ResolveRedirectUri(currentUri, response.Headers);
|
||||
response.Dispose();
|
||||
|
||||
if (redirectUri == null)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Сайт вернул {(int)response.StatusCode} для {currentUri}, но не прислал корректный адрес перенаправления.");
|
||||
}
|
||||
|
||||
currentUri = redirectUri;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((int)response.StatusCode >= 200 && (int)response.StatusCode <= 299)
|
||||
{
|
||||
return response;
|
||||
}
|
||||
|
||||
var statusCode = (int)response.StatusCode;
|
||||
var reasonPhrase = response.ReasonPhrase;
|
||||
response.Dispose();
|
||||
throw new HttpRequestException(
|
||||
$"Response status code does not indicate success: {statusCode} ({reasonPhrase}).");
|
||||
}
|
||||
|
||||
throw new InvalidOperationException(
|
||||
$"Превышено число перенаправлений ({maxRedirects}) для {currentUri}.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Не удалось получить данные с сайта Кто поверит: {request.RequestUri}. {ex.Message}",
|
||||
ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsRedirectStatusCode(HttpStatusCode statusCode)
|
||||
{
|
||||
return statusCode == HttpStatusCode.Moved
|
||||
|| statusCode == HttpStatusCode.Redirect
|
||||
|| statusCode == HttpStatusCode.RedirectMethod
|
||||
|| statusCode == HttpStatusCode.TemporaryRedirect
|
||||
|| (int)statusCode == 308;
|
||||
}
|
||||
|
||||
private static Uri ResolveRedirectUri(Uri currentUri, HttpResponseHeaders headers)
|
||||
{
|
||||
if (headers.Location != null)
|
||||
{
|
||||
return headers.Location.IsAbsoluteUri
|
||||
? headers.Location
|
||||
: new Uri(currentUri, headers.Location);
|
||||
}
|
||||
|
||||
if (!headers.TryGetValues("Location", out var values))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var rawLocation = values.FirstOrDefault();
|
||||
if (string.IsNullOrWhiteSpace(rawLocation))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Uri.TryCreate(rawLocation, UriKind.Absolute, out var absoluteUri))
|
||||
{
|
||||
return absoluteUri;
|
||||
}
|
||||
|
||||
if (Uri.TryCreate(currentUri, rawLocation, out var relativeUri))
|
||||
{
|
||||
return relativeUri;
|
||||
}
|
||||
|
||||
var escaped = Uri.EscapeUriString(rawLocation);
|
||||
if (Uri.TryCreate(escaped, UriKind.Absolute, out absoluteUri))
|
||||
{
|
||||
return absoluteUri;
|
||||
}
|
||||
|
||||
return Uri.TryCreate(currentUri, escaped, out relativeUri)
|
||||
? relativeUri
|
||||
: null;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_httpClient.Dispose();
|
||||
}
|
||||
}
|
||||
46
Services/PdfShellService.cs
Normal file
46
Services/PdfShellService.cs
Normal file
@@ -0,0 +1,46 @@
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using CRAWLER.Models;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal interface IPdfOpener
|
||||
{
|
||||
void OpenAttachment(PdfAttachment attachment);
|
||||
void OpenUri(string uri);
|
||||
}
|
||||
|
||||
internal sealed class PdfShellService : IPdfOpener
|
||||
{
|
||||
public void OpenAttachment(PdfAttachment attachment)
|
||||
{
|
||||
if (attachment == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(attachment.LocalPath) && File.Exists(attachment.LocalPath))
|
||||
{
|
||||
OpenUri(attachment.LocalPath);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(attachment.SourceUrl))
|
||||
{
|
||||
OpenUri(attachment.SourceUrl);
|
||||
}
|
||||
}
|
||||
|
||||
public void OpenUri(string uri)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(uri))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Process.Start(new ProcessStartInfo(uri)
|
||||
{
|
||||
UseShellExecute = true
|
||||
});
|
||||
}
|
||||
}
|
||||
91
Services/PdfStorageService.cs
Normal file
91
Services/PdfStorageService.cs
Normal file
@@ -0,0 +1,91 @@
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using CRAWLER.Configuration;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal sealed class PdfStorageService
|
||||
{
|
||||
private readonly KtoPoveritClient _client;
|
||||
private readonly string _rootPath;
|
||||
|
||||
public PdfStorageService(IConfiguration configuration, KtoPoveritClient client)
|
||||
{
|
||||
_client = client;
|
||||
var options = configuration.GetSection("Crawler").Get<CrawlerOptions>()
|
||||
?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json.");
|
||||
_rootPath = Environment.ExpandEnvironmentVariables(options.PdfStoragePath);
|
||||
Directory.CreateDirectory(_rootPath);
|
||||
}
|
||||
|
||||
public async Task<string> DownloadAsync(string sourceUrl, string registryNumber, string title, CancellationToken cancellationToken)
|
||||
{
|
||||
var bytes = await _client.GetBytesAsync(sourceUrl, cancellationToken);
|
||||
var fullPath = BuildTargetPath(registryNumber, title, sourceUrl);
|
||||
await File.WriteAllBytesAsync(fullPath, bytes, cancellationToken);
|
||||
return fullPath;
|
||||
}
|
||||
|
||||
public async Task<string> CopyFromLocalAsync(string sourcePath, string registryNumber, string title, CancellationToken cancellationToken)
|
||||
{
|
||||
var fullPath = BuildTargetPath(registryNumber, title, sourcePath);
|
||||
|
||||
await using var sourceStream = File.Open(sourcePath, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
await using var targetStream = File.Create(fullPath);
|
||||
await sourceStream.CopyToAsync(targetStream, cancellationToken);
|
||||
return fullPath;
|
||||
}
|
||||
|
||||
public void TryDelete(string path)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(path) && File.Exists(path))
|
||||
{
|
||||
File.Delete(path);
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private string BuildTargetPath(string registryNumber, string title, string sourceIdentity)
|
||||
{
|
||||
var safeFolder = MakeSafePathSegment(string.IsNullOrWhiteSpace(registryNumber) ? "manual" : registryNumber);
|
||||
var folder = Path.Combine(_rootPath, safeFolder);
|
||||
Directory.CreateDirectory(folder);
|
||||
|
||||
var baseName = MakeSafePathSegment(string.IsNullOrWhiteSpace(title) ? Path.GetFileNameWithoutExtension(sourceIdentity) : title);
|
||||
if (string.IsNullOrWhiteSpace(baseName))
|
||||
{
|
||||
baseName = "document";
|
||||
}
|
||||
|
||||
var fullPath = Path.Combine(folder, baseName + ".pdf");
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
return fullPath;
|
||||
}
|
||||
|
||||
var counter = 2;
|
||||
while (true)
|
||||
{
|
||||
var candidate = Path.Combine(folder, $"{baseName}-{counter}.pdf");
|
||||
if (!File.Exists(candidate))
|
||||
{
|
||||
return candidate;
|
||||
}
|
||||
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
private static string MakeSafePathSegment(string value)
|
||||
{
|
||||
var invalid = Path.GetInvalidFileNameChars();
|
||||
var cleaned = new string((value ?? string.Empty).Select(ch => invalid.Contains(ch) ? '_' : ch).ToArray()).Trim();
|
||||
return string.IsNullOrWhiteSpace(cleaned) ? "file" : cleaned;
|
||||
}
|
||||
}
|
||||
55
Services/SqlServerConnectionFactory.cs
Normal file
55
Services/SqlServerConnectionFactory.cs
Normal file
@@ -0,0 +1,55 @@
|
||||
using CRAWLER.Configuration;
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
|
||||
namespace CRAWLER.Services;
|
||||
|
||||
internal interface IDatabaseConnectionFactory
|
||||
{
|
||||
SqlConnection CreateConnection();
|
||||
SqlConnection CreateMasterConnection();
|
||||
DatabaseOptions Options { get; }
|
||||
}
|
||||
|
||||
internal sealed class SqlServerConnectionFactory : IDatabaseConnectionFactory
|
||||
{
|
||||
public SqlServerConnectionFactory(IConfiguration configuration)
|
||||
{
|
||||
Options = configuration.GetSection("Database").Get<DatabaseOptions>()
|
||||
?? throw new InvalidOperationException("Раздел Database не найден в appsettings.json.");
|
||||
}
|
||||
|
||||
public DatabaseOptions Options { get; }
|
||||
|
||||
public SqlConnection CreateConnection()
|
||||
{
|
||||
return new SqlConnection(BuildConnectionString(Options.Database));
|
||||
}
|
||||
|
||||
public SqlConnection CreateMasterConnection()
|
||||
{
|
||||
return new SqlConnection(BuildConnectionString("master"));
|
||||
}
|
||||
|
||||
private string BuildConnectionString(string databaseName)
|
||||
{
|
||||
var builder = new SqlConnectionStringBuilder
|
||||
{
|
||||
ApplicationName = Options.ApplicationName,
|
||||
DataSource = Options.Server,
|
||||
InitialCatalog = databaseName,
|
||||
ConnectTimeout = Options.ConnectTimeoutSeconds,
|
||||
Encrypt = Options.Encrypt,
|
||||
IntegratedSecurity = Options.IntegratedSecurity,
|
||||
MultipleActiveResultSets = Options.MultipleActiveResultSets,
|
||||
Pooling = Options.Pooling,
|
||||
MaxPoolSize = Options.MaxPoolSize,
|
||||
MinPoolSize = Options.MinPoolSize,
|
||||
TrustServerCertificate = Options.TrustServerCertificate,
|
||||
ConnectRetryCount = Options.ConnectRetryCount,
|
||||
ConnectRetryInterval = Options.ConnectRetryIntervalSeconds
|
||||
};
|
||||
|
||||
return builder.ConnectionString;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user