Добавьте файлы проекта.

This commit is contained in:
Курнат Андрей
2026-04-04 10:52:30 +03:00
parent 9b34a92f15
commit 5a55bc5f4c
30 changed files with 3446 additions and 0 deletions

View File

@@ -0,0 +1,141 @@
using Microsoft.Data.SqlClient;
namespace CRAWLER.Services;
internal sealed class DatabaseInitializer
{
private readonly IDatabaseConnectionFactory _connectionFactory;
public DatabaseInitializer(IDatabaseConnectionFactory connectionFactory)
{
_connectionFactory = connectionFactory;
}
public async Task EnsureCreatedAsync(CancellationToken cancellationToken)
{
await EnsureDatabaseExistsAsync(cancellationToken);
await EnsureSchemaAsync(cancellationToken);
}
private async Task EnsureDatabaseExistsAsync(CancellationToken cancellationToken)
{
await using var connection = _connectionFactory.CreateMasterConnection();
await connection.OpenAsync(cancellationToken);
var safeDatabaseName = _connectionFactory.Options.Database.Replace("]", "]]");
var sql = $@"
IF DB_ID(N'{safeDatabaseName}') IS NULL
BEGIN
CREATE DATABASE [{safeDatabaseName}];
END";
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
await command.ExecuteNonQueryAsync(cancellationToken);
}
private async Task EnsureSchemaAsync(CancellationToken cancellationToken)
{
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
var scripts = new[]
{
@"
IF OBJECT_ID(N'dbo.Instruments', N'U') IS NULL
BEGIN
CREATE TABLE dbo.Instruments
(
Id BIGINT IDENTITY(1,1) NOT NULL CONSTRAINT PK_Instruments PRIMARY KEY,
RegistryNumber NVARCHAR(64) NULL,
Name NVARCHAR(512) NOT NULL,
TypeDesignation NVARCHAR(512) NULL,
Manufacturer NVARCHAR(2000) NULL,
VerificationInterval NVARCHAR(512) NULL,
CertificateOrSerialNumber NVARCHAR(512) NULL,
AllowsBatchVerification NVARCHAR(256) NULL,
HasPeriodicVerification NVARCHAR(256) NULL,
TypeInfo NVARCHAR(256) NULL,
Purpose NVARCHAR(MAX) NULL,
Description NVARCHAR(MAX) NULL,
Software NVARCHAR(MAX) NULL,
MetrologicalCharacteristics NVARCHAR(MAX) NULL,
Completeness NVARCHAR(MAX) NULL,
Verification NVARCHAR(MAX) NULL,
RegulatoryDocuments NVARCHAR(MAX) NULL,
Applicant NVARCHAR(MAX) NULL,
TestCenter NVARCHAR(MAX) NULL,
DetailUrl NVARCHAR(1024) NULL,
SourceSystem NVARCHAR(64) NOT NULL CONSTRAINT DF_Instruments_SourceSystem DEFAULT N'Manual',
LastImportedAt DATETIME2 NULL,
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_Instruments_CreatedAt DEFAULT SYSUTCDATETIME(),
UpdatedAt DATETIME2 NOT NULL CONSTRAINT DF_Instruments_UpdatedAt DEFAULT SYSUTCDATETIME()
);
END",
@"
IF NOT EXISTS (
SELECT 1
FROM sys.indexes
WHERE name = N'UX_Instruments_RegistryNumber'
AND object_id = OBJECT_ID(N'dbo.Instruments')
)
BEGIN
CREATE UNIQUE INDEX UX_Instruments_RegistryNumber
ON dbo.Instruments (RegistryNumber)
WHERE RegistryNumber IS NOT NULL AND RegistryNumber <> N'';
END",
@"
IF OBJECT_ID(N'dbo.PdfAttachments', N'U') IS NULL
BEGIN
CREATE TABLE dbo.PdfAttachments
(
Id BIGINT IDENTITY(1,1) NOT NULL CONSTRAINT PK_PdfAttachments PRIMARY KEY,
InstrumentId BIGINT NOT NULL,
Kind NVARCHAR(128) NOT NULL,
Title NVARCHAR(256) NULL,
SourceUrl NVARCHAR(1024) NULL,
LocalPath NVARCHAR(1024) NULL,
IsManual BIT NOT NULL CONSTRAINT DF_PdfAttachments_IsManual DEFAULT (0),
CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_PdfAttachments_CreatedAt DEFAULT SYSUTCDATETIME(),
CONSTRAINT FK_PdfAttachments_Instruments
FOREIGN KEY (InstrumentId) REFERENCES dbo.Instruments(Id)
ON DELETE CASCADE
);
END",
@"
IF NOT EXISTS (
SELECT 1
FROM sys.indexes
WHERE name = N'IX_PdfAttachments_InstrumentId'
AND object_id = OBJECT_ID(N'dbo.PdfAttachments')
)
BEGIN
CREATE INDEX IX_PdfAttachments_InstrumentId
ON dbo.PdfAttachments (InstrumentId, CreatedAt DESC);
END",
@"
IF NOT EXISTS (
SELECT 1
FROM sys.indexes
WHERE name = N'UX_PdfAttachments_InstrumentId_SourceUrl'
AND object_id = OBJECT_ID(N'dbo.PdfAttachments')
)
BEGIN
CREATE UNIQUE INDEX UX_PdfAttachments_InstrumentId_SourceUrl
ON dbo.PdfAttachments (InstrumentId, SourceUrl)
WHERE SourceUrl IS NOT NULL AND SourceUrl <> N'';
END"
};
foreach (var script in scripts)
{
await using var command = new SqlCommand(script, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
await command.ExecuteNonQueryAsync(cancellationToken);
}
}
}

View File

@@ -0,0 +1,26 @@
using Microsoft.Win32;
namespace CRAWLER.Services;
internal interface IFilePickerService
{
IReadOnlyList<string> PickPdfFiles(bool multiselect);
}
internal sealed class FilePickerService : IFilePickerService
{
public IReadOnlyList<string> PickPdfFiles(bool multiselect)
{
var dialog = new OpenFileDialog
{
Filter = "PDF (*.pdf)|*.pdf",
Multiselect = multiselect,
CheckFileExists = true,
CheckPathExists = true
};
return dialog.ShowDialog() == true
? dialog.FileNames
: Array.Empty<string>();
}
}

View File

@@ -0,0 +1,306 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using CRAWLER.Models;
using CRAWLER.Parsing;
namespace CRAWLER.Services;
internal sealed class InstrumentCatalogService
{
private readonly CatalogPageParser _catalogPageParser;
private readonly DatabaseInitializer _databaseInitializer;
private readonly DetailPageParser _detailPageParser;
private readonly InstrumentRepository _repository;
private readonly KtoPoveritClient _client;
private readonly PdfStorageService _pdfStorageService;
public InstrumentCatalogService(
DatabaseInitializer databaseInitializer,
InstrumentRepository repository,
CatalogPageParser catalogPageParser,
DetailPageParser detailPageParser,
KtoPoveritClient client,
PdfStorageService pdfStorageService)
{
_databaseInitializer = databaseInitializer;
_repository = repository;
_catalogPageParser = catalogPageParser;
_detailPageParser = detailPageParser;
_client = client;
_pdfStorageService = pdfStorageService;
}
public int DefaultPagesToScan
{
get { return Math.Max(1, _client.Options.DefaultPagesToScan); }
}
public async Task InitializeAsync(CancellationToken cancellationToken)
{
await _databaseInitializer.EnsureCreatedAsync(cancellationToken);
}
public Task<IReadOnlyList<InstrumentSummary>> SearchAsync(string searchText, CancellationToken cancellationToken)
{
return _repository.SearchAsync(searchText, cancellationToken);
}
public Task<InstrumentRecord> GetByIdAsync(long id, CancellationToken cancellationToken)
{
return _repository.GetByIdAsync(id, cancellationToken);
}
public async Task<long> SaveInstrumentAsync(InstrumentRecord record, IEnumerable<string> pendingPdfPaths, CancellationToken cancellationToken)
{
var id = await _repository.SaveAsync(record, cancellationToken);
if (pendingPdfPaths != null)
{
foreach (var sourcePath in pendingPdfPaths.Where(path => !string.IsNullOrWhiteSpace(path)))
{
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, record.RegistryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
await _repository.SaveAttachmentAsync(new PdfAttachment
{
InstrumentId = id,
Kind = "Ручной PDF",
Title = Path.GetFileNameWithoutExtension(sourcePath),
LocalPath = localPath,
SourceUrl = null,
IsManual = true
}, cancellationToken);
}
}
return id;
}
public async Task DeleteInstrumentAsync(InstrumentRecord record, CancellationToken cancellationToken)
{
if (record == null)
{
return;
}
foreach (var attachment in record.Attachments)
{
_pdfStorageService.TryDelete(attachment.LocalPath);
}
await _repository.DeleteInstrumentAsync(record.Id, cancellationToken);
}
public async Task RemoveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken)
{
if (attachment == null)
{
return;
}
_pdfStorageService.TryDelete(attachment.LocalPath);
await _repository.DeleteAttachmentAsync(attachment.Id, cancellationToken);
}
public async Task<IReadOnlyList<PdfAttachment>> AddManualAttachmentsAsync(long instrumentId, string registryNumber, IEnumerable<string> sourcePaths, CancellationToken cancellationToken)
{
if (sourcePaths == null)
{
return Array.Empty<PdfAttachment>();
}
var added = new List<PdfAttachment>();
foreach (var sourcePath in sourcePaths.Where(path => !string.IsNullOrWhiteSpace(path)))
{
var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, registryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken);
var attachment = new PdfAttachment
{
InstrumentId = instrumentId,
Kind = "Ручной PDF",
Title = Path.GetFileNameWithoutExtension(sourcePath),
SourceUrl = null,
LocalPath = localPath,
IsManual = true
};
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
added.Add(attachment);
}
return added;
}
public async Task<SyncResult> SyncFromSiteAsync(int pagesToScan, IProgress<string> progress, CancellationToken cancellationToken)
{
var result = new SyncResult();
var totalPages = Math.Max(1, pagesToScan);
for (var page = 1; page <= totalPages; page++)
{
cancellationToken.ThrowIfCancellationRequested();
progress?.Report($"Чтение страницы {page}...");
IReadOnlyList<CatalogListItem> items;
try
{
var catalogHtml = await _client.GetStringAsync(_client.BuildCatalogPageUrl(page), cancellationToken);
items = _catalogPageParser.Parse(catalogHtml, _client.Options.BaseUrl);
result.PagesScanned++;
}
catch (Exception ex)
{
result.FailedPages++;
progress?.Report($"Страница {page} пропущена: {ex.Message}");
continue;
}
foreach (var item in items)
{
cancellationToken.ThrowIfCancellationRequested();
progress?.Report($"Обработка {item.RegistryNumber ?? item.Name}...");
try
{
var existingId = await _repository.FindInstrumentIdByRegistryNumberAsync(item.RegistryNumber, cancellationToken);
var existing = existingId.HasValue
? await _repository.GetByIdAsync(existingId.Value, cancellationToken)
: null;
ParsedInstrumentDetails details = null;
if (!string.IsNullOrWhiteSpace(item.DetailUrl))
{
try
{
var detailHtml = await _client.GetStringAsync(item.DetailUrl, cancellationToken);
details = _detailPageParser.Parse(detailHtml, _client.Options.BaseUrl);
}
catch
{
result.SkippedDetailRequests++;
}
}
var merged = Merge(existing, item, details);
merged.Id = existing?.Id ?? 0;
merged.SourceSystem = "KtoPoverit";
merged.DetailUrl = item.DetailUrl ?? existing?.DetailUrl;
merged.LastImportedAt = DateTime.UtcNow;
var savedId = await _repository.SaveAsync(merged, cancellationToken);
result.ProcessedItems++;
if (existing == null)
{
result.AddedRecords++;
}
else
{
result.UpdatedRecords++;
}
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Описание типа", details?.DescriptionTypePdfUrl ?? item.DescriptionTypePdfUrl, result, cancellationToken);
await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Методика поверки", details?.MethodologyPdfUrl ?? item.MethodologyPdfUrl, result, cancellationToken);
if (_client.Options.RequestDelayMilliseconds > 0)
{
await Task.Delay(_client.Options.RequestDelayMilliseconds, cancellationToken);
}
}
catch (Exception ex)
{
result.FailedItems++;
progress?.Report($"Запись {item.RegistryNumber ?? item.Name} пропущена: {ex.Message}");
}
}
}
progress?.Report($"Готово: страниц {result.PagesScanned}, записей {result.ProcessedItems}, проблемных записей {result.FailedItems}.");
return result;
}
private async Task SyncAttachmentAsync(long instrumentId, string registryNumber, string title, string sourceUrl, SyncResult result, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(sourceUrl))
{
return;
}
var existing = await _repository.FindAttachmentBySourceUrlAsync(instrumentId, sourceUrl, cancellationToken);
if (existing != null && !string.IsNullOrWhiteSpace(existing.LocalPath) && File.Exists(existing.LocalPath))
{
return;
}
try
{
var localPath = await _pdfStorageService.DownloadAsync(sourceUrl, registryNumber, title, cancellationToken);
var attachment = existing ?? new PdfAttachment
{
InstrumentId = instrumentId,
IsManual = false
};
attachment.Kind = title;
attachment.Title = title;
attachment.SourceUrl = sourceUrl;
attachment.LocalPath = localPath;
await _repository.SaveAttachmentAsync(attachment, cancellationToken);
result.DownloadedPdfFiles++;
}
catch
{
result.FailedPdfFiles++;
if (existing == null)
{
await _repository.SaveAttachmentAsync(new PdfAttachment
{
InstrumentId = instrumentId,
Kind = title,
Title = title,
SourceUrl = sourceUrl,
LocalPath = null,
IsManual = false
}, cancellationToken);
}
}
}
private static InstrumentRecord Merge(InstrumentRecord existing, CatalogListItem item, ParsedInstrumentDetails details)
{
var result = existing?.Clone() ?? new InstrumentRecord();
result.RegistryNumber = Prefer(details?.RegistryNumber, item?.RegistryNumber, existing?.RegistryNumber);
result.Name = Prefer(details?.Name, item?.Name, existing?.Name) ?? "Без названия";
result.TypeDesignation = Prefer(details?.TypeDesignation, item?.TypeDesignation, existing?.TypeDesignation);
result.Manufacturer = Prefer(details?.Manufacturer, item?.Manufacturer, existing?.Manufacturer);
result.VerificationInterval = Prefer(details?.VerificationInterval, item?.VerificationInterval, existing?.VerificationInterval);
result.CertificateOrSerialNumber = Prefer(details?.CertificateOrSerialNumber, item?.CertificateOrSerialNumber, existing?.CertificateOrSerialNumber);
result.AllowsBatchVerification = Prefer(details?.AllowsBatchVerification, existing?.AllowsBatchVerification);
result.HasPeriodicVerification = Prefer(details?.HasPeriodicVerification, existing?.HasPeriodicVerification);
result.TypeInfo = Prefer(details?.TypeInfo, existing?.TypeInfo);
result.Purpose = Prefer(details?.Purpose, existing?.Purpose);
result.Description = Prefer(details?.Description, existing?.Description);
result.Software = Prefer(details?.Software, existing?.Software);
result.MetrologicalCharacteristics = Prefer(details?.MetrologicalCharacteristics, existing?.MetrologicalCharacteristics);
result.Completeness = Prefer(details?.Completeness, existing?.Completeness);
result.Verification = Prefer(details?.Verification, existing?.Verification);
result.RegulatoryDocuments = Prefer(details?.RegulatoryDocuments, existing?.RegulatoryDocuments);
result.Applicant = Prefer(details?.Applicant, existing?.Applicant);
result.TestCenter = Prefer(details?.TestCenter, existing?.TestCenter);
return result;
}
private static string Prefer(params string[] values)
{
foreach (var value in values)
{
if (!string.IsNullOrWhiteSpace(value))
{
return value.Trim();
}
}
return null;
}
}

View File

@@ -0,0 +1,526 @@
using CRAWLER.Models;
using Microsoft.Data.SqlClient;
namespace CRAWLER.Services;
internal sealed class InstrumentRepository
{
private readonly IDatabaseConnectionFactory _connectionFactory;
public InstrumentRepository(IDatabaseConnectionFactory connectionFactory)
{
_connectionFactory = connectionFactory;
}
public async Task<IReadOnlyList<InstrumentSummary>> SearchAsync(string searchText, CancellationToken cancellationToken)
{
var items = new List<InstrumentSummary>();
var hasFilter = !string.IsNullOrWhiteSpace(searchText);
const string sql = @"
SELECT TOP (500)
Id,
RegistryNumber,
Name,
TypeDesignation,
Manufacturer,
VerificationInterval,
SourceSystem,
UpdatedAt
FROM dbo.Instruments
WHERE @Search IS NULL
OR RegistryNumber LIKE @Like
OR Name LIKE @Like
OR TypeDesignation LIKE @Like
OR Manufacturer LIKE @Like
ORDER BY
CASE WHEN RegistryNumber IS NULL OR RegistryNumber = N'' THEN 1 ELSE 0 END,
RegistryNumber DESC,
UpdatedAt DESC;";
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@Search", hasFilter ? searchText.Trim() : DBNull.Value);
command.Parameters.AddWithValue("@Like", hasFilter ? $"%{searchText.Trim()}%" : DBNull.Value);
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
while (await reader.ReadAsync(cancellationToken))
{
items.Add(new InstrumentSummary
{
Id = reader.GetInt64(0),
RegistryNumber = GetString(reader, 1),
Name = GetString(reader, 2),
TypeDesignation = GetString(reader, 3),
Manufacturer = GetString(reader, 4),
VerificationInterval = GetString(reader, 5),
SourceSystem = GetString(reader, 6),
UpdatedAt = reader.GetDateTime(7)
});
}
return items;
}
public async Task<InstrumentRecord> GetByIdAsync(long id, CancellationToken cancellationToken)
{
const string sql = @"
SELECT
Id,
RegistryNumber,
Name,
TypeDesignation,
Manufacturer,
VerificationInterval,
CertificateOrSerialNumber,
AllowsBatchVerification,
HasPeriodicVerification,
TypeInfo,
Purpose,
Description,
Software,
MetrologicalCharacteristics,
Completeness,
Verification,
RegulatoryDocuments,
Applicant,
TestCenter,
DetailUrl,
SourceSystem,
LastImportedAt,
CreatedAt,
UpdatedAt
FROM dbo.Instruments
WHERE Id = @Id;";
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@Id", id);
InstrumentRecord item = null;
await using (var reader = await command.ExecuteReaderAsync(cancellationToken))
{
if (await reader.ReadAsync(cancellationToken))
{
item = new InstrumentRecord
{
Id = reader.GetInt64(0),
RegistryNumber = GetString(reader, 1),
Name = GetString(reader, 2),
TypeDesignation = GetString(reader, 3),
Manufacturer = GetString(reader, 4),
VerificationInterval = GetString(reader, 5),
CertificateOrSerialNumber = GetString(reader, 6),
AllowsBatchVerification = GetString(reader, 7),
HasPeriodicVerification = GetString(reader, 8),
TypeInfo = GetString(reader, 9),
Purpose = GetString(reader, 10),
Description = GetString(reader, 11),
Software = GetString(reader, 12),
MetrologicalCharacteristics = GetString(reader, 13),
Completeness = GetString(reader, 14),
Verification = GetString(reader, 15),
RegulatoryDocuments = GetString(reader, 16),
Applicant = GetString(reader, 17),
TestCenter = GetString(reader, 18),
DetailUrl = GetString(reader, 19),
SourceSystem = GetString(reader, 20),
LastImportedAt = reader.IsDBNull(21) ? (DateTime?)null : reader.GetDateTime(21),
CreatedAt = reader.GetDateTime(22),
UpdatedAt = reader.GetDateTime(23)
};
}
}
if (item == null)
{
return null;
}
item.Attachments = (await GetAttachmentsAsync(connection, id, cancellationToken)).ToList();
return item;
}
public async Task<long?> FindInstrumentIdByRegistryNumberAsync(string registryNumber, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(registryNumber))
{
return null;
}
const string sql = "SELECT Id FROM dbo.Instruments WHERE RegistryNumber = @RegistryNumber;";
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@RegistryNumber", registryNumber.Trim());
var result = await command.ExecuteScalarAsync(cancellationToken);
if (result == null || result == DBNull.Value)
{
return null;
}
return Convert.ToInt64(result);
}
public async Task<long> SaveAsync(InstrumentRecord record, CancellationToken cancellationToken)
{
if (record == null)
{
throw new ArgumentNullException(nameof(record));
}
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
if (record.Id <= 0)
{
const string insertSql = @"
INSERT INTO dbo.Instruments
(
RegistryNumber,
Name,
TypeDesignation,
Manufacturer,
VerificationInterval,
CertificateOrSerialNumber,
AllowsBatchVerification,
HasPeriodicVerification,
TypeInfo,
Purpose,
Description,
Software,
MetrologicalCharacteristics,
Completeness,
Verification,
RegulatoryDocuments,
Applicant,
TestCenter,
DetailUrl,
SourceSystem,
LastImportedAt,
CreatedAt,
UpdatedAt
)
OUTPUT INSERTED.Id
VALUES
(
@RegistryNumber,
@Name,
@TypeDesignation,
@Manufacturer,
@VerificationInterval,
@CertificateOrSerialNumber,
@AllowsBatchVerification,
@HasPeriodicVerification,
@TypeInfo,
@Purpose,
@Description,
@Software,
@MetrologicalCharacteristics,
@Completeness,
@Verification,
@RegulatoryDocuments,
@Applicant,
@TestCenter,
@DetailUrl,
@SourceSystem,
@LastImportedAt,
SYSUTCDATETIME(),
SYSUTCDATETIME()
);";
await using var command = CreateRecordCommand(insertSql, connection, record);
var id = await command.ExecuteScalarAsync(cancellationToken);
return Convert.ToInt64(id);
}
const string updateSql = @"
UPDATE dbo.Instruments
SET
RegistryNumber = @RegistryNumber,
Name = @Name,
TypeDesignation = @TypeDesignation,
Manufacturer = @Manufacturer,
VerificationInterval = @VerificationInterval,
CertificateOrSerialNumber = @CertificateOrSerialNumber,
AllowsBatchVerification = @AllowsBatchVerification,
HasPeriodicVerification = @HasPeriodicVerification,
TypeInfo = @TypeInfo,
Purpose = @Purpose,
Description = @Description,
Software = @Software,
MetrologicalCharacteristics = @MetrologicalCharacteristics,
Completeness = @Completeness,
Verification = @Verification,
RegulatoryDocuments = @RegulatoryDocuments,
Applicant = @Applicant,
TestCenter = @TestCenter,
DetailUrl = @DetailUrl,
SourceSystem = @SourceSystem,
LastImportedAt = @LastImportedAt,
UpdatedAt = SYSUTCDATETIME()
WHERE Id = @Id;";
await using (var command = CreateRecordCommand(updateSql, connection, record))
{
command.Parameters.AddWithValue("@Id", record.Id);
await command.ExecuteNonQueryAsync(cancellationToken);
}
return record.Id;
}
public async Task<PdfAttachment> FindAttachmentBySourceUrlAsync(long instrumentId, string sourceUrl, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(sourceUrl))
{
return null;
}
const string sql = @"
SELECT
Id,
InstrumentId,
Kind,
Title,
SourceUrl,
LocalPath,
IsManual,
CreatedAt
FROM dbo.PdfAttachments
WHERE InstrumentId = @InstrumentId
AND SourceUrl = @SourceUrl;";
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@InstrumentId", instrumentId);
command.Parameters.AddWithValue("@SourceUrl", sourceUrl);
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
if (!await reader.ReadAsync(cancellationToken))
{
return null;
}
return new PdfAttachment
{
Id = reader.GetInt64(0),
InstrumentId = reader.GetInt64(1),
Kind = GetString(reader, 2),
Title = GetString(reader, 3),
SourceUrl = GetString(reader, 4),
LocalPath = GetString(reader, 5),
IsManual = reader.GetBoolean(6),
CreatedAt = reader.GetDateTime(7)
};
}
public async Task SaveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken)
{
if (attachment == null)
{
throw new ArgumentNullException(nameof(attachment));
}
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
if (attachment.Id <= 0)
{
const string insertSql = @"
INSERT INTO dbo.PdfAttachments
(
InstrumentId,
Kind,
Title,
SourceUrl,
LocalPath,
IsManual,
CreatedAt
)
VALUES
(
@InstrumentId,
@Kind,
@Title,
@SourceUrl,
@LocalPath,
@IsManual,
SYSUTCDATETIME()
);";
await using var command = CreateAttachmentCommand(insertSql, connection, attachment);
await command.ExecuteNonQueryAsync(cancellationToken);
return;
}
const string updateSql = @"
UPDATE dbo.PdfAttachments
SET
Kind = @Kind,
Title = @Title,
SourceUrl = @SourceUrl,
LocalPath = @LocalPath,
IsManual = @IsManual
WHERE Id = @Id;";
await using (var command = CreateAttachmentCommand(updateSql, connection, attachment))
{
command.Parameters.AddWithValue("@Id", attachment.Id);
await command.ExecuteNonQueryAsync(cancellationToken);
}
}
public async Task DeleteAttachmentAsync(long attachmentId, CancellationToken cancellationToken)
{
const string sql = "DELETE FROM dbo.PdfAttachments WHERE Id = @Id;";
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@Id", attachmentId);
await command.ExecuteNonQueryAsync(cancellationToken);
}
public async Task DeleteInstrumentAsync(long id, CancellationToken cancellationToken)
{
const string sql = "DELETE FROM dbo.Instruments WHERE Id = @Id;";
await using var connection = _connectionFactory.CreateConnection();
await connection.OpenAsync(cancellationToken);
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@Id", id);
await command.ExecuteNonQueryAsync(cancellationToken);
}
private async Task<IReadOnlyList<PdfAttachment>> GetAttachmentsAsync(SqlConnection connection, long instrumentId, CancellationToken cancellationToken)
{
const string sql = @"
SELECT
Id,
InstrumentId,
Kind,
Title,
SourceUrl,
LocalPath,
IsManual,
CreatedAt
FROM dbo.PdfAttachments
WHERE InstrumentId = @InstrumentId
ORDER BY CreatedAt DESC, Id DESC;";
var items = new List<PdfAttachment>();
await using var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@InstrumentId", instrumentId);
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
while (await reader.ReadAsync(cancellationToken))
{
items.Add(new PdfAttachment
{
Id = reader.GetInt64(0),
InstrumentId = reader.GetInt64(1),
Kind = GetString(reader, 2),
Title = GetString(reader, 3),
SourceUrl = GetString(reader, 4),
LocalPath = GetString(reader, 5),
IsManual = reader.GetBoolean(6),
CreatedAt = reader.GetDateTime(7)
});
}
return items;
}
private SqlCommand CreateRecordCommand(string sql, SqlConnection connection, InstrumentRecord record)
{
var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@RegistryNumber", ToDbValue(record.RegistryNumber));
command.Parameters.AddWithValue("@Name", string.IsNullOrWhiteSpace(record.Name) ? "Без названия" : record.Name.Trim());
command.Parameters.AddWithValue("@TypeDesignation", ToDbValue(record.TypeDesignation));
command.Parameters.AddWithValue("@Manufacturer", ToDbValue(record.Manufacturer));
command.Parameters.AddWithValue("@VerificationInterval", ToDbValue(record.VerificationInterval));
command.Parameters.AddWithValue("@CertificateOrSerialNumber", ToDbValue(record.CertificateOrSerialNumber));
command.Parameters.AddWithValue("@AllowsBatchVerification", ToDbValue(record.AllowsBatchVerification));
command.Parameters.AddWithValue("@HasPeriodicVerification", ToDbValue(record.HasPeriodicVerification));
command.Parameters.AddWithValue("@TypeInfo", ToDbValue(record.TypeInfo));
command.Parameters.AddWithValue("@Purpose", ToDbValue(record.Purpose));
command.Parameters.AddWithValue("@Description", ToDbValue(record.Description));
command.Parameters.AddWithValue("@Software", ToDbValue(record.Software));
command.Parameters.AddWithValue("@MetrologicalCharacteristics", ToDbValue(record.MetrologicalCharacteristics));
command.Parameters.AddWithValue("@Completeness", ToDbValue(record.Completeness));
command.Parameters.AddWithValue("@Verification", ToDbValue(record.Verification));
command.Parameters.AddWithValue("@RegulatoryDocuments", ToDbValue(record.RegulatoryDocuments));
command.Parameters.AddWithValue("@Applicant", ToDbValue(record.Applicant));
command.Parameters.AddWithValue("@TestCenter", ToDbValue(record.TestCenter));
command.Parameters.AddWithValue("@DetailUrl", ToDbValue(record.DetailUrl));
command.Parameters.AddWithValue("@SourceSystem", string.IsNullOrWhiteSpace(record.SourceSystem) ? "Manual" : record.SourceSystem.Trim());
command.Parameters.AddWithValue("@LastImportedAt", record.LastImportedAt.HasValue ? record.LastImportedAt.Value : DBNull.Value);
return command;
}
private SqlCommand CreateAttachmentCommand(string sql, SqlConnection connection, PdfAttachment attachment)
{
var command = new SqlCommand(sql, connection)
{
CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds
};
command.Parameters.AddWithValue("@InstrumentId", attachment.InstrumentId);
command.Parameters.AddWithValue("@Kind", string.IsNullOrWhiteSpace(attachment.Kind) ? "PDF" : attachment.Kind.Trim());
command.Parameters.AddWithValue("@Title", ToDbValue(attachment.Title));
command.Parameters.AddWithValue("@SourceUrl", ToDbValue(attachment.SourceUrl));
command.Parameters.AddWithValue("@LocalPath", ToDbValue(attachment.LocalPath));
command.Parameters.AddWithValue("@IsManual", attachment.IsManual);
return command;
}
private static object ToDbValue(string value)
{
return string.IsNullOrWhiteSpace(value) ? DBNull.Value : value.Trim();
}
private static string GetString(SqlDataReader reader, int index)
{
return reader.IsDBNull(index) ? null : reader.GetString(index);
}
}

View File

@@ -0,0 +1,187 @@
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using CRAWLER.Configuration;
using Microsoft.Extensions.Configuration;
namespace CRAWLER.Services;
internal sealed class KtoPoveritClient : IDisposable
{
private readonly CrawlerOptions _options;
private readonly HttpClient _httpClient;
public KtoPoveritClient(IConfiguration configuration)
{
_options = configuration.GetSection("Crawler").Get<CrawlerOptions>()
?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json.");
var handler = new SocketsHttpHandler
{
AutomaticDecompression = DecompressionMethods.All,
AllowAutoRedirect = false
};
_httpClient = new HttpClient(handler)
{
Timeout = TimeSpan.FromSeconds(Math.Max(5, _options.TimeoutSeconds))
};
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(_options.UserAgent);
_httpClient.DefaultRequestHeaders.AcceptLanguage.ParseAdd("ru-RU,ru;q=0.9,en-US;q=0.8");
}
public CrawlerOptions Options
{
get { return _options; }
}
public async Task<string> GetStringAsync(string url, CancellationToken cancellationToken)
{
using var request = CreateRequest(url);
using var response = await SendAsync(request, cancellationToken);
return await response.Content.ReadAsStringAsync(cancellationToken);
}
public async Task<byte[]> GetBytesAsync(string url, CancellationToken cancellationToken)
{
using var request = CreateRequest(url);
using var response = await SendAsync(request, cancellationToken);
return await response.Content.ReadAsByteArrayAsync(cancellationToken);
}
public string BuildCatalogPageUrl(int page)
{
var relative = string.Format(_options.CatalogPathFormat, page);
return BuildAbsoluteUrl(relative);
}
public string BuildAbsoluteUrl(string urlOrPath)
{
if (string.IsNullOrWhiteSpace(urlOrPath))
{
return null;
}
if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri))
{
return absoluteUri.ToString();
}
var baseUri = new Uri(_options.BaseUrl.TrimEnd('/') + "/");
return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString();
}
private HttpRequestMessage CreateRequest(string url)
{
return new HttpRequestMessage(HttpMethod.Get, url)
{
Version = HttpVersion.Version11,
VersionPolicy = HttpVersionPolicy.RequestVersionOrLower
};
}
private async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
{
var currentUri = request.RequestUri ?? throw new InvalidOperationException("Не задан URL запроса.");
const int maxRedirects = 10;
try
{
for (var redirectIndex = 0; redirectIndex <= maxRedirects; redirectIndex++)
{
using var currentRequest = CreateRequest(currentUri.ToString());
var response = await _httpClient.SendAsync(currentRequest, HttpCompletionOption.ResponseContentRead, cancellationToken);
if (IsRedirectStatusCode(response.StatusCode))
{
var redirectUri = ResolveRedirectUri(currentUri, response.Headers);
response.Dispose();
if (redirectUri == null)
{
throw new InvalidOperationException(
$"Сайт вернул {(int)response.StatusCode} для {currentUri}, но не прислал корректный адрес перенаправления.");
}
currentUri = redirectUri;
continue;
}
if ((int)response.StatusCode >= 200 && (int)response.StatusCode <= 299)
{
return response;
}
var statusCode = (int)response.StatusCode;
var reasonPhrase = response.ReasonPhrase;
response.Dispose();
throw new HttpRequestException(
$"Response status code does not indicate success: {statusCode} ({reasonPhrase}).");
}
throw new InvalidOperationException(
$"Превышено число перенаправлений ({maxRedirects}) для {currentUri}.");
}
catch (Exception ex)
{
throw new InvalidOperationException(
$"Не удалось получить данные с сайта Кто поверит: {request.RequestUri}. {ex.Message}",
ex);
}
}
private static bool IsRedirectStatusCode(HttpStatusCode statusCode)
{
return statusCode == HttpStatusCode.Moved
|| statusCode == HttpStatusCode.Redirect
|| statusCode == HttpStatusCode.RedirectMethod
|| statusCode == HttpStatusCode.TemporaryRedirect
|| (int)statusCode == 308;
}
private static Uri ResolveRedirectUri(Uri currentUri, HttpResponseHeaders headers)
{
if (headers.Location != null)
{
return headers.Location.IsAbsoluteUri
? headers.Location
: new Uri(currentUri, headers.Location);
}
if (!headers.TryGetValues("Location", out var values))
{
return null;
}
var rawLocation = values.FirstOrDefault();
if (string.IsNullOrWhiteSpace(rawLocation))
{
return null;
}
if (Uri.TryCreate(rawLocation, UriKind.Absolute, out var absoluteUri))
{
return absoluteUri;
}
if (Uri.TryCreate(currentUri, rawLocation, out var relativeUri))
{
return relativeUri;
}
var escaped = Uri.EscapeUriString(rawLocation);
if (Uri.TryCreate(escaped, UriKind.Absolute, out absoluteUri))
{
return absoluteUri;
}
return Uri.TryCreate(currentUri, escaped, out relativeUri)
? relativeUri
: null;
}
public void Dispose()
{
_httpClient.Dispose();
}
}

View File

@@ -0,0 +1,46 @@
using System.Diagnostics;
using System.IO;
using CRAWLER.Models;
namespace CRAWLER.Services;
internal interface IPdfOpener
{
void OpenAttachment(PdfAttachment attachment);
void OpenUri(string uri);
}
internal sealed class PdfShellService : IPdfOpener
{
public void OpenAttachment(PdfAttachment attachment)
{
if (attachment == null)
{
return;
}
if (!string.IsNullOrWhiteSpace(attachment.LocalPath) && File.Exists(attachment.LocalPath))
{
OpenUri(attachment.LocalPath);
return;
}
if (!string.IsNullOrWhiteSpace(attachment.SourceUrl))
{
OpenUri(attachment.SourceUrl);
}
}
public void OpenUri(string uri)
{
if (string.IsNullOrWhiteSpace(uri))
{
return;
}
Process.Start(new ProcessStartInfo(uri)
{
UseShellExecute = true
});
}
}

View File

@@ -0,0 +1,91 @@
using System.IO;
using System.Linq;
using CRAWLER.Configuration;
using Microsoft.Extensions.Configuration;
namespace CRAWLER.Services;
internal sealed class PdfStorageService
{
private readonly KtoPoveritClient _client;
private readonly string _rootPath;
public PdfStorageService(IConfiguration configuration, KtoPoveritClient client)
{
_client = client;
var options = configuration.GetSection("Crawler").Get<CrawlerOptions>()
?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json.");
_rootPath = Environment.ExpandEnvironmentVariables(options.PdfStoragePath);
Directory.CreateDirectory(_rootPath);
}
public async Task<string> DownloadAsync(string sourceUrl, string registryNumber, string title, CancellationToken cancellationToken)
{
var bytes = await _client.GetBytesAsync(sourceUrl, cancellationToken);
var fullPath = BuildTargetPath(registryNumber, title, sourceUrl);
await File.WriteAllBytesAsync(fullPath, bytes, cancellationToken);
return fullPath;
}
public async Task<string> CopyFromLocalAsync(string sourcePath, string registryNumber, string title, CancellationToken cancellationToken)
{
var fullPath = BuildTargetPath(registryNumber, title, sourcePath);
await using var sourceStream = File.Open(sourcePath, FileMode.Open, FileAccess.Read, FileShare.Read);
await using var targetStream = File.Create(fullPath);
await sourceStream.CopyToAsync(targetStream, cancellationToken);
return fullPath;
}
public void TryDelete(string path)
{
try
{
if (!string.IsNullOrWhiteSpace(path) && File.Exists(path))
{
File.Delete(path);
}
}
catch
{
}
}
private string BuildTargetPath(string registryNumber, string title, string sourceIdentity)
{
var safeFolder = MakeSafePathSegment(string.IsNullOrWhiteSpace(registryNumber) ? "manual" : registryNumber);
var folder = Path.Combine(_rootPath, safeFolder);
Directory.CreateDirectory(folder);
var baseName = MakeSafePathSegment(string.IsNullOrWhiteSpace(title) ? Path.GetFileNameWithoutExtension(sourceIdentity) : title);
if (string.IsNullOrWhiteSpace(baseName))
{
baseName = "document";
}
var fullPath = Path.Combine(folder, baseName + ".pdf");
if (!File.Exists(fullPath))
{
return fullPath;
}
var counter = 2;
while (true)
{
var candidate = Path.Combine(folder, $"{baseName}-{counter}.pdf");
if (!File.Exists(candidate))
{
return candidate;
}
counter++;
}
}
private static string MakeSafePathSegment(string value)
{
var invalid = Path.GetInvalidFileNameChars();
var cleaned = new string((value ?? string.Empty).Select(ch => invalid.Contains(ch) ? '_' : ch).ToArray()).Trim();
return string.IsNullOrWhiteSpace(cleaned) ? "file" : cleaned;
}
}

View File

@@ -0,0 +1,55 @@
using CRAWLER.Configuration;
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Configuration;
namespace CRAWLER.Services;
internal interface IDatabaseConnectionFactory
{
SqlConnection CreateConnection();
SqlConnection CreateMasterConnection();
DatabaseOptions Options { get; }
}
internal sealed class SqlServerConnectionFactory : IDatabaseConnectionFactory
{
public SqlServerConnectionFactory(IConfiguration configuration)
{
Options = configuration.GetSection("Database").Get<DatabaseOptions>()
?? throw new InvalidOperationException("Раздел Database не найден в appsettings.json.");
}
public DatabaseOptions Options { get; }
public SqlConnection CreateConnection()
{
return new SqlConnection(BuildConnectionString(Options.Database));
}
public SqlConnection CreateMasterConnection()
{
return new SqlConnection(BuildConnectionString("master"));
}
private string BuildConnectionString(string databaseName)
{
var builder = new SqlConnectionStringBuilder
{
ApplicationName = Options.ApplicationName,
DataSource = Options.Server,
InitialCatalog = databaseName,
ConnectTimeout = Options.ConnectTimeoutSeconds,
Encrypt = Options.Encrypt,
IntegratedSecurity = Options.IntegratedSecurity,
MultipleActiveResultSets = Options.MultipleActiveResultSets,
Pooling = Options.Pooling,
MaxPoolSize = Options.MaxPoolSize,
MinPoolSize = Options.MinPoolSize,
TrustServerCertificate = Options.TrustServerCertificate,
ConnectRetryCount = Options.ConnectRetryCount,
ConnectRetryInterval = Options.ConnectRetryIntervalSeconds
};
return builder.ConnectionString;
}
}