diff --git a/App.xaml b/App.xaml new file mode 100644 index 0000000..c3ddbd2 --- /dev/null +++ b/App.xaml @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/App.xaml.cs b/App.xaml.cs new file mode 100644 index 0000000..d7cc151 --- /dev/null +++ b/App.xaml.cs @@ -0,0 +1,153 @@ +using System; +using System.Globalization; +using System.Threading; +using System.Threading.Tasks; +using System.Windows; +using System.Windows.Markup; +using System.Windows.Threading; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; + +namespace CRAWLER; + +public partial class App : Application +{ + private IHost _host; + + public App() + { + ApplyRussianCulture(); + RegisterGlobalExceptionHandlers(); + } + + protected override async void OnStartup(StartupEventArgs e) + { + base.OnStartup(e); + + try + { + _host = AppHost.Create(); + await _host.StartAsync().ConfigureAwait(true); + + MainWindow = _host.Services.GetRequiredService(); + MainWindow.Show(); + } + catch (Exception ex) + { + MessageBox.Show(ex.Message, "CRAWLER", MessageBoxButton.OK, MessageBoxImage.Error); + Shutdown(-1); + } + } + + protected override async void OnExit(ExitEventArgs e) + { + try + { + if (_host != null) + { + try + { + await _host.StopAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(true); + } + finally + { + _host.Dispose(); + } + } + } + catch (Exception ex) + { + ShowUnhandledException(ex, true); + } + finally + { + UnregisterGlobalExceptionHandlers(); + base.OnExit(e); + } + } + + private static void ApplyRussianCulture() + { + var culture = new CultureInfo("ru-RU"); + + CultureInfo.DefaultThreadCurrentCulture = culture; + CultureInfo.DefaultThreadCurrentUICulture = culture; + Thread.CurrentThread.CurrentCulture = culture; + Thread.CurrentThread.CurrentUICulture = culture; + + FrameworkElement.LanguageProperty.OverrideMetadata( + typeof(FrameworkElement), + new FrameworkPropertyMetadata(XmlLanguage.GetLanguage(culture.IetfLanguageTag))); + } + + private void RegisterGlobalExceptionHandlers() + { + DispatcherUnhandledException += OnDispatcherUnhandledException; + AppDomain.CurrentDomain.UnhandledException += OnCurrentDomainUnhandledException; + TaskScheduler.UnobservedTaskException += OnUnobservedTaskException; + } + + private void UnregisterGlobalExceptionHandlers() + { + DispatcherUnhandledException -= OnDispatcherUnhandledException; + AppDomain.CurrentDomain.UnhandledException -= OnCurrentDomainUnhandledException; + TaskScheduler.UnobservedTaskException -= OnUnobservedTaskException; + } + + private void OnDispatcherUnhandledException(object sender, DispatcherUnhandledExceptionEventArgs e) + { + ShowUnhandledException(e.Exception, false); + e.Handled = true; + } + + private void OnCurrentDomainUnhandledException(object sender, UnhandledExceptionEventArgs e) + { + if (e.ExceptionObject is Exception exception) + { + ShowUnhandledException(exception, e.IsTerminating); + return; + } + + MessageBox.Show( + e.ExceptionObject == null ? "Произошла необработанная ошибка." : e.ExceptionObject.ToString(), + e.IsTerminating ? "CRAWLER - критическая ошибка" : "CRAWLER", + MessageBoxButton.OK, + MessageBoxImage.Error); + } + + private void OnUnobservedTaskException(object sender, UnobservedTaskExceptionEventArgs e) + { + ShowUnhandledException(e.Exception, false); + e.SetObserved(); + } + + private static void ShowUnhandledException(Exception exception, bool isCritical) + { + var actualException = UnwrapException(exception); + var message = string.IsNullOrWhiteSpace(actualException.Message) + ? actualException.ToString() + : actualException.Message; + + MessageBox.Show( + message, + isCritical ? "CRAWLER - критическая ошибка" : "CRAWLER", + MessageBoxButton.OK, + MessageBoxImage.Error); + } + + private static Exception UnwrapException(Exception exception) + { + if (exception is AggregateException aggregateException) + { + var flattened = aggregateException.Flatten(); + if (flattened.InnerExceptions.Count == 1) + { + return UnwrapException(flattened.InnerExceptions[0]); + } + + return flattened; + } + + return exception; + } +} diff --git a/AppHost.cs b/AppHost.cs new file mode 100644 index 0000000..8b23428 --- /dev/null +++ b/AppHost.cs @@ -0,0 +1,47 @@ +using System; +using CRAWLER.Parsing; +using CRAWLER.Services; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; + +namespace CRAWLER; + +internal static class AppHost +{ + public static IHost Create() + { + return Host.CreateDefaultBuilder() + .UseContentRoot(AppContext.BaseDirectory) + .ConfigureAppConfiguration((_, config) => + { + config.Sources.Clear(); + config.SetBasePath(AppContext.BaseDirectory); + config.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true); + config.AddEnvironmentVariables(); + }) + .ConfigureServices((context, services) => + { + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddTransient(provider => new MainWindow( + provider.GetRequiredService(), + provider.GetRequiredService(), + provider.GetRequiredService())); + }) + .UseDefaultServiceProvider((_, options) => + { + options.ValidateOnBuild = true; + options.ValidateScopes = true; + }) + .Build(); + } +} diff --git a/AssemblyInfo.cs b/AssemblyInfo.cs new file mode 100644 index 0000000..cc29e7f --- /dev/null +++ b/AssemblyInfo.cs @@ -0,0 +1,10 @@ +using System.Windows; + +[assembly:ThemeInfo( + ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located + //(used if a resource is not found in the page, + // or application resource dictionaries) + ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located + //(used if a resource is not found in the page, + // app, or any theme specific resource dictionaries) +)] diff --git a/CRAWLER.csproj b/CRAWLER.csproj new file mode 100644 index 0000000..9aacd80 --- /dev/null +++ b/CRAWLER.csproj @@ -0,0 +1,37 @@ + + + + WinExe + net10.0-windows + disable + enable + true + + + + + + + + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + diff --git a/CRAWLER.sln b/CRAWLER.sln new file mode 100644 index 0000000..c2e5296 --- /dev/null +++ b/CRAWLER.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 18 +VisualStudioVersion = 18.4.11626.88 stable +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CRAWLER", "CRAWLER.csproj", "{99A6FD50-529F-431C-8F74-7F37BBA2948E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {99A6FD50-529F-431C-8F74-7F37BBA2948E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {99A6FD50-529F-431C-8F74-7F37BBA2948E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {99A6FD50-529F-431C-8F74-7F37BBA2948E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {99A6FD50-529F-431C-8F74-7F37BBA2948E}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {607CB39E-759D-4950-B3DE-F3008151509A} + EndGlobalSection +EndGlobal diff --git a/Configuration/CrawlerOptions.cs b/Configuration/CrawlerOptions.cs new file mode 100644 index 0000000..39f17e0 --- /dev/null +++ b/Configuration/CrawlerOptions.cs @@ -0,0 +1,12 @@ +namespace CRAWLER.Configuration; + +internal sealed class CrawlerOptions +{ + public string BaseUrl { get; set; } = "https://www.ktopoverit.ru"; + public string CatalogPathFormat { get; set; } = "/poverka/gosreestr_sredstv_izmereniy?page={0}"; + public int RequestDelayMilliseconds { get; set; } = 350; + public int DefaultPagesToScan { get; set; } = 1; + public string PdfStoragePath { get; set; } = "%LOCALAPPDATA%\\CRAWLER\\PdfStore"; + public int TimeoutSeconds { get; set; } = 30; + public string UserAgent { get; set; } = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) CRAWLER/1.0"; +} diff --git a/Configuration/DatabaseOptions.cs b/Configuration/DatabaseOptions.cs new file mode 100644 index 0000000..ab69dd9 --- /dev/null +++ b/Configuration/DatabaseOptions.cs @@ -0,0 +1,19 @@ +namespace CRAWLER.Configuration; + +internal sealed class DatabaseOptions +{ + public string ApplicationName { get; set; } = "CRAWLER"; + public int CommandTimeoutSeconds { get; set; } = 60; + public int ConnectRetryCount { get; set; } = 3; + public int ConnectRetryIntervalSeconds { get; set; } = 5; + public int ConnectTimeoutSeconds { get; set; } = 15; + public string Database { get; set; } = "CRAWLER"; + public bool Encrypt { get; set; } + public bool IntegratedSecurity { get; set; } = true; + public bool MultipleActiveResultSets { get; set; } = true; + public bool Pooling { get; set; } = true; + public int MaxPoolSize { get; set; } = 100; + public int MinPoolSize { get; set; } + public string Server { get; set; } = @".\SQLEXPRESS"; + public bool TrustServerCertificate { get; set; } = true; +} diff --git a/Dialogs/EditInstrumentWindow.xaml b/Dialogs/EditInstrumentWindow.xaml new file mode 100644 index 0000000..3b489ec --- /dev/null +++ b/Dialogs/EditInstrumentWindow.xaml @@ -0,0 +1,175 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Dialogs/EditInstrumentWindow.xaml.cs b/Dialogs/EditInstrumentWindow.xaml.cs new file mode 100644 index 0000000..71528f6 --- /dev/null +++ b/Dialogs/EditInstrumentWindow.xaml.cs @@ -0,0 +1,46 @@ +using System.Windows; +using CRAWLER.Services; +using CRAWLER.ViewModels; + +namespace CRAWLER.Dialogs; + +public partial class EditInstrumentWindow : Window +{ + private readonly IFilePickerService _filePickerService; + + internal EditInstrumentWindow(EditInstrumentWindowViewModel viewModel, IFilePickerService filePickerService) + { + InitializeComponent(); + ViewModel = viewModel; + _filePickerService = filePickerService; + DataContext = ViewModel; + } + + internal EditInstrumentWindowViewModel ViewModel { get; } + + private void BrowsePdfButton_Click(object sender, RoutedEventArgs e) + { + ViewModel.AddPendingFiles(_filePickerService.PickPdfFiles(true)); + } + + private void RemovePendingPdfButton_Click(object sender, RoutedEventArgs e) + { + ViewModel.RemovePendingSelected(); + } + + private void SaveButton_Click(object sender, RoutedEventArgs e) + { + if (!ViewModel.Validate(out var errorMessage)) + { + MessageBox.Show(errorMessage, "CRAWLER", MessageBoxButton.OK, MessageBoxImage.Warning); + return; + } + + DialogResult = true; + } + + private void CancelButton_Click(object sender, RoutedEventArgs e) + { + DialogResult = false; + } +} diff --git a/Infrastructure/MvvmInfrastructure.cs b/Infrastructure/MvvmInfrastructure.cs new file mode 100644 index 0000000..caeb6e6 --- /dev/null +++ b/Infrastructure/MvvmInfrastructure.cs @@ -0,0 +1,126 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; +using System.Windows.Input; + +namespace CRAWLER.Infrastructure; + +public abstract class ObservableObject : INotifyPropertyChanged +{ + public event PropertyChangedEventHandler PropertyChanged; + + protected bool SetProperty(ref T field, T value, [CallerMemberName] string propertyName = null) + { + if (EqualityComparer.Default.Equals(field, value)) + { + return false; + } + + field = value; + OnPropertyChanged(propertyName); + return true; + } + + protected void OnPropertyChanged([CallerMemberName] string propertyName = null) + { + var handler = PropertyChanged; + if (handler != null) + { + handler(this, new PropertyChangedEventArgs(propertyName)); + } + } +} + +internal sealed class RelayCommand : ICommand +{ + private readonly Action _execute; + private readonly Predicate _canExecute; + + public RelayCommand(Action execute, Predicate canExecute = null) + { + _execute = execute ?? throw new ArgumentNullException(nameof(execute)); + _canExecute = canExecute; + } + + public event EventHandler CanExecuteChanged; + + public bool CanExecute(object parameter) + { + return _canExecute == null || _canExecute(parameter); + } + + public void Execute(object parameter) + { + _execute(parameter); + } + + public void RaiseCanExecuteChanged() + { + var handler = CanExecuteChanged; + if (handler != null) + { + handler(this, EventArgs.Empty); + } + } +} + +internal sealed class AsyncRelayCommand : ICommand +{ + private readonly Func _executeAsync; + private readonly Predicate _canExecute; + private bool _isExecuting; + + public AsyncRelayCommand(Func executeAsync, Func canExecute = null) + : this(_ => executeAsync(), canExecute == null ? null : new Predicate(_ => canExecute())) + { + } + + public AsyncRelayCommand(Func executeAsync, Predicate canExecute = null) + { + _executeAsync = executeAsync ?? throw new ArgumentNullException(nameof(executeAsync)); + _canExecute = canExecute; + } + + public event EventHandler CanExecuteChanged; + + public bool CanExecute(object parameter) + { + return !_isExecuting && (_canExecute == null || _canExecute(parameter)); + } + + public async void Execute(object parameter) + { + await ExecuteAsync(parameter); + } + + public async Task ExecuteAsync(object parameter = null) + { + if (!CanExecute(parameter)) + { + return; + } + + try + { + _isExecuting = true; + RaiseCanExecuteChanged(); + await _executeAsync(parameter); + } + finally + { + _isExecuting = false; + RaiseCanExecuteChanged(); + } + } + + public void RaiseCanExecuteChanged() + { + var handler = CanExecuteChanged; + if (handler != null) + { + handler(this, EventArgs.Empty); + } + } +} diff --git a/MainWindow.xaml b/MainWindow.xaml new file mode 100644 index 0000000..2dab736 --- /dev/null +++ b/MainWindow.xaml @@ -0,0 +1,268 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/MainWindow.xaml.cs b/MainWindow.xaml.cs new file mode 100644 index 0000000..b06073e --- /dev/null +++ b/MainWindow.xaml.cs @@ -0,0 +1,188 @@ +using System; +using System.Threading.Tasks; +using System.Windows; +using System.Windows.Input; +using CRAWLER.Models; +using CRAWLER.Services; +using CRAWLER.ViewModels; + +namespace CRAWLER; + +public partial class MainWindow : Window +{ + private readonly IFilePickerService _filePickerService; + private readonly MainWindowViewModel _viewModel; + + internal MainWindow( + InstrumentCatalogService catalogService, + IPdfOpener pdfOpener, + IFilePickerService filePickerService) + { + InitializeComponent(); + _filePickerService = filePickerService; + _viewModel = new MainWindowViewModel(catalogService, pdfOpener); + DataContext = _viewModel; + } + + private async void Window_Loaded(object sender, RoutedEventArgs e) + { + await ExecuteUiAsync(_viewModel.InitializeAsync); + } + + private async void RefreshButton_Click(object sender, RoutedEventArgs e) + { + await ExecuteUiAsync(() => _viewModel.RefreshAsync()); + } + + private async void SyncButton_Click(object sender, RoutedEventArgs e) + { + await ExecuteUiAsync(async () => + { + var result = await _viewModel.SyncAsync(); + MessageBox.Show( + $"Обработано страниц: {result.PagesScanned}\n" + + $"Обработано записей: {result.ProcessedItems}\n" + + $"Добавлено: {result.AddedRecords}\n" + + $"Обновлено: {result.UpdatedRecords}\n" + + $"Пропущено страниц: {result.FailedPages}\n" + + $"Пропущено записей: {result.FailedItems}\n" + + $"Карточек без деталей: {result.SkippedDetailRequests}\n" + + $"Скачано PDF: {result.DownloadedPdfFiles}\n" + + $"Ошибок PDF: {result.FailedPdfFiles}", + "CRAWLER", + MessageBoxButton.OK, + MessageBoxImage.Information); + }); + } + + private async void AddButton_Click(object sender, RoutedEventArgs e) + { + var dialog = new Dialogs.EditInstrumentWindow( + new EditInstrumentWindowViewModel(_viewModel.CreateNewDraft(), true), + _filePickerService) + { + Owner = this + }; + + if (dialog.ShowDialog() == true) + { + await ExecuteUiAsync(() => _viewModel.SaveAsync(dialog.ViewModel.Draft, dialog.ViewModel.GetPendingPaths())); + } + } + + private async void EditButton_Click(object sender, RoutedEventArgs e) + { + if (_viewModel.SelectedInstrument == null) + { + return; + } + + var dialog = new Dialogs.EditInstrumentWindow( + new EditInstrumentWindowViewModel(_viewModel.CreateDraftFromSelected(), false), + _filePickerService) + { + Owner = this + }; + + if (dialog.ShowDialog() == true) + { + await ExecuteUiAsync(() => _viewModel.SaveAsync(dialog.ViewModel.Draft, dialog.ViewModel.GetPendingPaths())); + } + } + + private async void DeleteButton_Click(object sender, RoutedEventArgs e) + { + if (_viewModel.SelectedInstrument == null) + { + return; + } + + var answer = MessageBox.Show( + $"Удалить запись \"{_viewModel.SelectedInstrument.Name}\"?", + "CRAWLER", + MessageBoxButton.YesNo, + MessageBoxImage.Warning); + + if (answer == MessageBoxResult.Yes) + { + await ExecuteUiAsync(_viewModel.DeleteSelectedAsync); + } + } + + private async void AddPdfButton_Click(object sender, RoutedEventArgs e) + { + if (_viewModel.SelectedInstrument == null) + { + return; + } + + var paths = _filePickerService.PickPdfFiles(true); + if (paths.Count == 0) + { + return; + } + + await ExecuteUiAsync(() => _viewModel.AddAttachmentsToSelectedAsync(paths)); + } + + private void OpenAttachmentButton_Click(object sender, RoutedEventArgs e) + { + _viewModel.OpenAttachment(AttachmentGrid.SelectedItem as PdfAttachment); + } + + private async void RemoveAttachmentButton_Click(object sender, RoutedEventArgs e) + { + var attachment = AttachmentGrid.SelectedItem as PdfAttachment; + if (attachment == null) + { + return; + } + + var answer = MessageBox.Show( + $"Удалить привязку к PDF \"{attachment.DisplayName}\"?", + "CRAWLER", + MessageBoxButton.YesNo, + MessageBoxImage.Warning); + + if (answer == MessageBoxResult.Yes) + { + await ExecuteUiAsync(() => _viewModel.RemoveAttachmentAsync(attachment)); + } + } + + private void OpenSourceButton_Click(object sender, RoutedEventArgs e) + { + _viewModel.OpenSourceUrl(); + } + + private async void InstrumentGrid_MouseDoubleClick(object sender, MouseButtonEventArgs e) + { + if (_viewModel.SelectedInstrument != null) + { + EditButton_Click(sender, e); + } + + await Task.CompletedTask; + } + + private async void SearchTextBox_KeyDown(object sender, KeyEventArgs e) + { + if (e.Key == Key.Enter) + { + e.Handled = true; + await ExecuteUiAsync(() => _viewModel.RefreshAsync()); + } + } + + private async Task ExecuteUiAsync(Func action) + { + try + { + await action(); + } + catch (Exception ex) + { + MessageBox.Show(ex.Message, "CRAWLER", MessageBoxButton.OK, MessageBoxImage.Error); + } + } +} diff --git a/Models/InstrumentModels.cs b/Models/InstrumentModels.cs new file mode 100644 index 0000000..75cca0f --- /dev/null +++ b/Models/InstrumentModels.cs @@ -0,0 +1,150 @@ +using System; +using System.Collections.Generic; +using System.IO; + +namespace CRAWLER.Models; + +internal sealed class InstrumentSummary +{ + public long Id { get; set; } + public string RegistryNumber { get; set; } + public string Name { get; set; } + public string TypeDesignation { get; set; } + public string Manufacturer { get; set; } + public string VerificationInterval { get; set; } + public string SourceSystem { get; set; } + public DateTime UpdatedAt { get; set; } +} + +internal sealed class InstrumentRecord +{ + public long Id { get; set; } + public string RegistryNumber { get; set; } + public string Name { get; set; } + public string TypeDesignation { get; set; } + public string Manufacturer { get; set; } + public string VerificationInterval { get; set; } + public string CertificateOrSerialNumber { get; set; } + public string AllowsBatchVerification { get; set; } + public string HasPeriodicVerification { get; set; } + public string TypeInfo { get; set; } + public string Purpose { get; set; } + public string Description { get; set; } + public string Software { get; set; } + public string MetrologicalCharacteristics { get; set; } + public string Completeness { get; set; } + public string Verification { get; set; } + public string RegulatoryDocuments { get; set; } + public string Applicant { get; set; } + public string TestCenter { get; set; } + public string DetailUrl { get; set; } + public string SourceSystem { get; set; } = "Manual"; + public DateTime? LastImportedAt { get; set; } + public DateTime CreatedAt { get; set; } + public DateTime UpdatedAt { get; set; } + public List Attachments { get; set; } = new List(); + + public InstrumentRecord Clone() + { + var copy = (InstrumentRecord)MemberwiseClone(); + copy.Attachments = new List(); + + foreach (var attachment in Attachments) + { + copy.Attachments.Add(attachment.Clone()); + } + + return copy; + } +} + +internal sealed class PdfAttachment +{ + public long Id { get; set; } + public long InstrumentId { get; set; } + public string Kind { get; set; } + public string Title { get; set; } + public string SourceUrl { get; set; } + public string LocalPath { get; set; } + public bool IsManual { get; set; } + public DateTime CreatedAt { get; set; } + + public string DisplayName + { + get + { + if (!string.IsNullOrWhiteSpace(Title)) + { + return Title; + } + + if (!string.IsNullOrWhiteSpace(LocalPath)) + { + return Path.GetFileName(LocalPath); + } + + return SourceUrl ?? string.Empty; + } + } + + public PdfAttachment Clone() + { + return (PdfAttachment)MemberwiseClone(); + } +} + +internal sealed class PendingPdfFile +{ + public string SourcePath { get; set; } + public string DisplayName { get; set; } +} + +internal sealed class CatalogListItem +{ + public string RegistryNumber { get; set; } + public string Name { get; set; } + public string TypeDesignation { get; set; } + public string Manufacturer { get; set; } + public string VerificationInterval { get; set; } + public string CertificateOrSerialNumber { get; set; } + public string DetailUrl { get; set; } + public string DescriptionTypePdfUrl { get; set; } + public string MethodologyPdfUrl { get; set; } +} + +internal sealed class ParsedInstrumentDetails +{ + public string RegistryNumber { get; set; } + public string Name { get; set; } + public string TypeDesignation { get; set; } + public string Manufacturer { get; set; } + public string VerificationInterval { get; set; } + public string CertificateOrSerialNumber { get; set; } + public string AllowsBatchVerification { get; set; } + public string HasPeriodicVerification { get; set; } + public string TypeInfo { get; set; } + public string Purpose { get; set; } + public string Description { get; set; } + public string Software { get; set; } + public string MetrologicalCharacteristics { get; set; } + public string Completeness { get; set; } + public string Verification { get; set; } + public string RegulatoryDocuments { get; set; } + public string Applicant { get; set; } + public string TestCenter { get; set; } + public string DescriptionTypePdfUrl { get; set; } + public string MethodologyPdfUrl { get; set; } +} + +internal sealed class SyncResult +{ + public int PagesScanned { get; set; } + public int ProcessedItems { get; set; } + public int AddedRecords { get; set; } + public int UpdatedRecords { get; set; } + public int DownloadedPdfFiles { get; set; } + public int FailedPdfFiles { get; set; } + public int FailedPages { get; set; } + public int FailedItems { get; set; } + public int SkippedDetailRequests { get; set; } +} diff --git a/Parsing/CatalogPageParser.cs b/Parsing/CatalogPageParser.cs new file mode 100644 index 0000000..148e379 --- /dev/null +++ b/Parsing/CatalogPageParser.cs @@ -0,0 +1,86 @@ +using System; +using System.Collections.Generic; +using CRAWLER.Models; +using HtmlAgilityPack; + +namespace CRAWLER.Parsing; + +internal sealed class CatalogPageParser +{ + public IReadOnlyList Parse(string html, string baseUrl) + { + var document = new HtmlDocument(); + document.LoadHtml(html ?? string.Empty); + + var items = new List(); + var nodes = document.DocumentNode.SelectNodes("//div[contains(concat(' ', normalize-space(@class), ' '), ' sivreestre ')]"); + if (nodes == null) + { + return items; + } + + foreach (var node in nodes) + { + var item = new CatalogListItem + { + RegistryNumber = ReadBlockValue(node, "№ в госреестре"), + Name = ReadBlockValue(node, "Наименование"), + TypeDesignation = ReadBlockValue(node, "Тип"), + Manufacturer = ReadBlockValue(node, "Производитель"), + VerificationInterval = ReadBlockValue(node, "МПИ"), + CertificateOrSerialNumber = ReadBlockValue(node, "Cвидетельство завод. номер") + ?? ReadBlockValue(node, "Свидетельство завод. номер"), + DetailUrl = ReadDetailUrl(node, baseUrl), + DescriptionTypePdfUrl = ReadPdfUrl(node, "/prof/opisanie/", baseUrl), + MethodologyPdfUrl = ReadPdfUrl(node, "/prof/metodiki/", baseUrl) + }; + + if (!string.IsNullOrWhiteSpace(item.RegistryNumber) || !string.IsNullOrWhiteSpace(item.Name)) + { + items.Add(item); + } + } + + return items; + } + + private static string ReadBlockValue(HtmlNode root, string header) + { + var block = FindBlockByHeader(root, header); + return HtmlParsingHelpers.ExtractNodeTextExcludingChildParagraph(block); + } + + private static HtmlNode FindBlockByHeader(HtmlNode root, string header) + { + var blocks = root.SelectNodes("./div"); + if (blocks == null) + { + return null; + } + + foreach (var block in blocks) + { + var paragraph = block.SelectSingleNode("./p"); + var label = HtmlParsingHelpers.NormalizeLabel(paragraph?.InnerText); + if (string.Equals(label, header, StringComparison.OrdinalIgnoreCase)) + { + return block; + } + } + + return null; + } + + private static string ReadDetailUrl(HtmlNode root, string baseUrl) + { + var link = root.SelectSingleNode(".//div[contains(@class,'resulttable6')]/a[1]") + ?? root.SelectSingleNode(".//div[contains(@class,'resulttable4')]/a[1]"); + return HtmlParsingHelpers.MakeAbsoluteUrl(baseUrl, link?.GetAttributeValue("href", null)); + } + + private static string ReadPdfUrl(HtmlNode root, string marker, string baseUrl) + { + var link = root.SelectSingleNode($".//a[contains(@href,'{marker}')]"); + return HtmlParsingHelpers.MakeAbsoluteUrl(baseUrl, link?.GetAttributeValue("href", null)); + } +} diff --git a/Parsing/DetailPageParser.cs b/Parsing/DetailPageParser.cs new file mode 100644 index 0000000..4568c88 --- /dev/null +++ b/Parsing/DetailPageParser.cs @@ -0,0 +1,65 @@ +using System; +using System.Collections.Generic; +using CRAWLER.Models; +using HtmlAgilityPack; + +namespace CRAWLER.Parsing; + +internal sealed class DetailPageParser +{ + public ParsedInstrumentDetails Parse(string html, string baseUrl) + { + var document = new HtmlDocument(); + document.LoadHtml(html ?? string.Empty); + + var rows = document.DocumentNode.SelectNodes("//table[contains(@class,'resulttable1')]//tr"); + var values = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (rows != null) + { + foreach (var row in rows) + { + var cells = row.SelectNodes("./td"); + if (cells == null || cells.Count < 2) + { + continue; + } + + var label = HtmlParsingHelpers.NormalizeLabel(cells[0].InnerText); + var value = HtmlParsingHelpers.NormalizeWhitespace(cells[1].InnerText); + if (!string.IsNullOrWhiteSpace(label)) + { + values[label] = value; + } + } + } + + return new ParsedInstrumentDetails + { + RegistryNumber = Get(values, "Номер в госреестре"), + Name = Get(values, "Наименование"), + TypeDesignation = Get(values, "Обозначение типа"), + Manufacturer = Get(values, "Производитель"), + VerificationInterval = Get(values, "Межповерочный интервал (МПИ)"), + CertificateOrSerialNumber = Get(values, "Срок свидетельства или заводской номер"), + AllowsBatchVerification = Get(values, "Допускается поверка партии"), + HasPeriodicVerification = Get(values, "Наличие периодической поверки"), + TypeInfo = Get(values, "Сведения о типе"), + Purpose = Get(values, "Назначение"), + Description = Get(values, "Описание"), + Software = Get(values, "Программное обеспечение"), + MetrologicalCharacteristics = Get(values, "Метрологические и технические характеристики"), + Completeness = Get(values, "Комплектность"), + Verification = Get(values, "Поверка"), + RegulatoryDocuments = Get(values, "Нормативные и технические документы"), + Applicant = Get(values, "Заявитель"), + TestCenter = Get(values, "Испытательный центр"), + DescriptionTypePdfUrl = HtmlParsingHelpers.MakeAbsoluteUrl(baseUrl, document.DocumentNode.SelectSingleNode("//a[contains(@href,'/prof/opisanie/')]")?.GetAttributeValue("href", null)), + MethodologyPdfUrl = HtmlParsingHelpers.MakeAbsoluteUrl(baseUrl, document.DocumentNode.SelectSingleNode("//a[contains(@href,'/prof/metodiki/')]")?.GetAttributeValue("href", null)) + }; + } + + private static string Get(IDictionary values, string key) + { + return values.TryGetValue(key, out var value) ? value : null; + } +} diff --git a/Parsing/HtmlParsingHelpers.cs b/Parsing/HtmlParsingHelpers.cs new file mode 100644 index 0000000..0a15700 --- /dev/null +++ b/Parsing/HtmlParsingHelpers.cs @@ -0,0 +1,64 @@ +using System; +using System.Text.RegularExpressions; +using HtmlAgilityPack; + +namespace CRAWLER.Parsing; + +internal static class HtmlParsingHelpers +{ + public static string NormalizeWhitespace(string value) + { + if (string.IsNullOrWhiteSpace(value)) + { + return null; + } + + var decoded = HtmlEntity.DeEntitize(value); + decoded = decoded.Replace('\u00A0', ' '); + decoded = Regex.Replace(decoded, @"\s+", " "); + return decoded.Trim(); + } + + public static string NormalizeLabel(string value) + { + return NormalizeWhitespace(value) + ?.Replace(" :", ":") + .Trim(':', ' '); + } + + public static string MakeAbsoluteUrl(string baseUrl, string urlOrPath) + { + if (string.IsNullOrWhiteSpace(urlOrPath)) + { + return null; + } + + if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri)) + { + return absoluteUri.ToString(); + } + + var baseUri = new Uri(baseUrl.TrimEnd('/') + "/"); + return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString(); + } + + public static string ExtractNodeTextExcludingChildParagraph(HtmlNode node) + { + if (node == null) + { + return null; + } + + var clone = node.CloneNode(true); + var paragraphs = clone.SelectNodes("./p"); + if (paragraphs != null) + { + foreach (var paragraph in paragraphs) + { + paragraph.Remove(); + } + } + + return NormalizeWhitespace(clone.InnerText); + } +} diff --git a/SampleData/catalog-page-sample.html b/SampleData/catalog-page-sample.html new file mode 100644 index 0000000..0bc41fa --- /dev/null +++ b/SampleData/catalog-page-sample.html @@ -0,0 +1,31 @@ + + + + № в госреестре + 97957-26 + + + Наименование + Расходомеры-счетчики электромагнитные + + + ТипСчетовод + + + Описание типа + Скачать + Методики поверки + Скачать + + + МПИ60 мес. + + + Cвидетельствозавод. номер00033, 00032, 00031, 0003010.03.2031 + + + Производитель + Общество с ограниченной ответственностью «Производственная фирма «Гидродинамика» + + + diff --git a/SampleData/detail-page-sample.html b/SampleData/detail-page-sample.html new file mode 100644 index 0000000..72f9859 --- /dev/null +++ b/SampleData/detail-page-sample.html @@ -0,0 +1,22 @@ + + Номер в госреестре97957-26 + НаименованиеРасходомеры-счетчики электромагнитные + Обозначение типаСчетовод + ПроизводительОбщество с ограниченной ответственностью «Производственная фирма «Гидродинамика» + Описание типаСкачать + Методика поверкиСкачать + Межповерочный интервал (МПИ)60 мес. + Допускается поверка партии + Наличие периодической поверки + Сведения о типеЗаводской номер + Срок свидетельства или заводской номер10.03.2031 + Назначение + Описание + Программное обеспечение + Метрологические и технические характеристики + Комплектность + Поверка + Нормативные и технические документы + Заявитель + Испытательный центр + diff --git a/Services/DatabaseInitializer.cs b/Services/DatabaseInitializer.cs new file mode 100644 index 0000000..ba853db --- /dev/null +++ b/Services/DatabaseInitializer.cs @@ -0,0 +1,141 @@ +using Microsoft.Data.SqlClient; + +namespace CRAWLER.Services; + +internal sealed class DatabaseInitializer +{ + private readonly IDatabaseConnectionFactory _connectionFactory; + + public DatabaseInitializer(IDatabaseConnectionFactory connectionFactory) + { + _connectionFactory = connectionFactory; + } + + public async Task EnsureCreatedAsync(CancellationToken cancellationToken) + { + await EnsureDatabaseExistsAsync(cancellationToken); + await EnsureSchemaAsync(cancellationToken); + } + + private async Task EnsureDatabaseExistsAsync(CancellationToken cancellationToken) + { + await using var connection = _connectionFactory.CreateMasterConnection(); + await connection.OpenAsync(cancellationToken); + + var safeDatabaseName = _connectionFactory.Options.Database.Replace("]", "]]"); + var sql = $@" +IF DB_ID(N'{safeDatabaseName}') IS NULL +BEGIN + CREATE DATABASE [{safeDatabaseName}]; +END"; + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + await command.ExecuteNonQueryAsync(cancellationToken); + } + + private async Task EnsureSchemaAsync(CancellationToken cancellationToken) + { + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + var scripts = new[] + { + @" +IF OBJECT_ID(N'dbo.Instruments', N'U') IS NULL +BEGIN + CREATE TABLE dbo.Instruments + ( + Id BIGINT IDENTITY(1,1) NOT NULL CONSTRAINT PK_Instruments PRIMARY KEY, + RegistryNumber NVARCHAR(64) NULL, + Name NVARCHAR(512) NOT NULL, + TypeDesignation NVARCHAR(512) NULL, + Manufacturer NVARCHAR(2000) NULL, + VerificationInterval NVARCHAR(512) NULL, + CertificateOrSerialNumber NVARCHAR(512) NULL, + AllowsBatchVerification NVARCHAR(256) NULL, + HasPeriodicVerification NVARCHAR(256) NULL, + TypeInfo NVARCHAR(256) NULL, + Purpose NVARCHAR(MAX) NULL, + Description NVARCHAR(MAX) NULL, + Software NVARCHAR(MAX) NULL, + MetrologicalCharacteristics NVARCHAR(MAX) NULL, + Completeness NVARCHAR(MAX) NULL, + Verification NVARCHAR(MAX) NULL, + RegulatoryDocuments NVARCHAR(MAX) NULL, + Applicant NVARCHAR(MAX) NULL, + TestCenter NVARCHAR(MAX) NULL, + DetailUrl NVARCHAR(1024) NULL, + SourceSystem NVARCHAR(64) NOT NULL CONSTRAINT DF_Instruments_SourceSystem DEFAULT N'Manual', + LastImportedAt DATETIME2 NULL, + CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_Instruments_CreatedAt DEFAULT SYSUTCDATETIME(), + UpdatedAt DATETIME2 NOT NULL CONSTRAINT DF_Instruments_UpdatedAt DEFAULT SYSUTCDATETIME() + ); +END", + @" +IF NOT EXISTS ( + SELECT 1 + FROM sys.indexes + WHERE name = N'UX_Instruments_RegistryNumber' + AND object_id = OBJECT_ID(N'dbo.Instruments') +) +BEGIN + CREATE UNIQUE INDEX UX_Instruments_RegistryNumber + ON dbo.Instruments (RegistryNumber) + WHERE RegistryNumber IS NOT NULL AND RegistryNumber <> N''; +END", + @" +IF OBJECT_ID(N'dbo.PdfAttachments', N'U') IS NULL +BEGIN + CREATE TABLE dbo.PdfAttachments + ( + Id BIGINT IDENTITY(1,1) NOT NULL CONSTRAINT PK_PdfAttachments PRIMARY KEY, + InstrumentId BIGINT NOT NULL, + Kind NVARCHAR(128) NOT NULL, + Title NVARCHAR(256) NULL, + SourceUrl NVARCHAR(1024) NULL, + LocalPath NVARCHAR(1024) NULL, + IsManual BIT NOT NULL CONSTRAINT DF_PdfAttachments_IsManual DEFAULT (0), + CreatedAt DATETIME2 NOT NULL CONSTRAINT DF_PdfAttachments_CreatedAt DEFAULT SYSUTCDATETIME(), + CONSTRAINT FK_PdfAttachments_Instruments + FOREIGN KEY (InstrumentId) REFERENCES dbo.Instruments(Id) + ON DELETE CASCADE + ); +END", + @" +IF NOT EXISTS ( + SELECT 1 + FROM sys.indexes + WHERE name = N'IX_PdfAttachments_InstrumentId' + AND object_id = OBJECT_ID(N'dbo.PdfAttachments') +) +BEGIN + CREATE INDEX IX_PdfAttachments_InstrumentId + ON dbo.PdfAttachments (InstrumentId, CreatedAt DESC); +END", + @" +IF NOT EXISTS ( + SELECT 1 + FROM sys.indexes + WHERE name = N'UX_PdfAttachments_InstrumentId_SourceUrl' + AND object_id = OBJECT_ID(N'dbo.PdfAttachments') +) +BEGIN + CREATE UNIQUE INDEX UX_PdfAttachments_InstrumentId_SourceUrl + ON dbo.PdfAttachments (InstrumentId, SourceUrl) + WHERE SourceUrl IS NOT NULL AND SourceUrl <> N''; +END" + }; + + foreach (var script in scripts) + { + await using var command = new SqlCommand(script, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + await command.ExecuteNonQueryAsync(cancellationToken); + } + } +} diff --git a/Services/FilePickerService.cs b/Services/FilePickerService.cs new file mode 100644 index 0000000..13797ed --- /dev/null +++ b/Services/FilePickerService.cs @@ -0,0 +1,26 @@ +using Microsoft.Win32; + +namespace CRAWLER.Services; + +internal interface IFilePickerService +{ + IReadOnlyList PickPdfFiles(bool multiselect); +} + +internal sealed class FilePickerService : IFilePickerService +{ + public IReadOnlyList PickPdfFiles(bool multiselect) + { + var dialog = new OpenFileDialog + { + Filter = "PDF (*.pdf)|*.pdf", + Multiselect = multiselect, + CheckFileExists = true, + CheckPathExists = true + }; + + return dialog.ShowDialog() == true + ? dialog.FileNames + : Array.Empty(); + } +} diff --git a/Services/InstrumentCatalogService.cs b/Services/InstrumentCatalogService.cs new file mode 100644 index 0000000..31f3f72 --- /dev/null +++ b/Services/InstrumentCatalogService.cs @@ -0,0 +1,306 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using CRAWLER.Models; +using CRAWLER.Parsing; + +namespace CRAWLER.Services; + +internal sealed class InstrumentCatalogService +{ + private readonly CatalogPageParser _catalogPageParser; + private readonly DatabaseInitializer _databaseInitializer; + private readonly DetailPageParser _detailPageParser; + private readonly InstrumentRepository _repository; + private readonly KtoPoveritClient _client; + private readonly PdfStorageService _pdfStorageService; + + public InstrumentCatalogService( + DatabaseInitializer databaseInitializer, + InstrumentRepository repository, + CatalogPageParser catalogPageParser, + DetailPageParser detailPageParser, + KtoPoveritClient client, + PdfStorageService pdfStorageService) + { + _databaseInitializer = databaseInitializer; + _repository = repository; + _catalogPageParser = catalogPageParser; + _detailPageParser = detailPageParser; + _client = client; + _pdfStorageService = pdfStorageService; + } + + public int DefaultPagesToScan + { + get { return Math.Max(1, _client.Options.DefaultPagesToScan); } + } + + public async Task InitializeAsync(CancellationToken cancellationToken) + { + await _databaseInitializer.EnsureCreatedAsync(cancellationToken); + } + + public Task> SearchAsync(string searchText, CancellationToken cancellationToken) + { + return _repository.SearchAsync(searchText, cancellationToken); + } + + public Task GetByIdAsync(long id, CancellationToken cancellationToken) + { + return _repository.GetByIdAsync(id, cancellationToken); + } + + public async Task SaveInstrumentAsync(InstrumentRecord record, IEnumerable pendingPdfPaths, CancellationToken cancellationToken) + { + var id = await _repository.SaveAsync(record, cancellationToken); + + if (pendingPdfPaths != null) + { + foreach (var sourcePath in pendingPdfPaths.Where(path => !string.IsNullOrWhiteSpace(path))) + { + var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, record.RegistryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken); + await _repository.SaveAttachmentAsync(new PdfAttachment + { + InstrumentId = id, + Kind = "Ручной PDF", + Title = Path.GetFileNameWithoutExtension(sourcePath), + LocalPath = localPath, + SourceUrl = null, + IsManual = true + }, cancellationToken); + } + } + + return id; + } + + public async Task DeleteInstrumentAsync(InstrumentRecord record, CancellationToken cancellationToken) + { + if (record == null) + { + return; + } + + foreach (var attachment in record.Attachments) + { + _pdfStorageService.TryDelete(attachment.LocalPath); + } + + await _repository.DeleteInstrumentAsync(record.Id, cancellationToken); + } + + public async Task RemoveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken) + { + if (attachment == null) + { + return; + } + + _pdfStorageService.TryDelete(attachment.LocalPath); + await _repository.DeleteAttachmentAsync(attachment.Id, cancellationToken); + } + + public async Task> AddManualAttachmentsAsync(long instrumentId, string registryNumber, IEnumerable sourcePaths, CancellationToken cancellationToken) + { + if (sourcePaths == null) + { + return Array.Empty(); + } + + var added = new List(); + foreach (var sourcePath in sourcePaths.Where(path => !string.IsNullOrWhiteSpace(path))) + { + var localPath = await _pdfStorageService.CopyFromLocalAsync(sourcePath, registryNumber, Path.GetFileNameWithoutExtension(sourcePath), cancellationToken); + var attachment = new PdfAttachment + { + InstrumentId = instrumentId, + Kind = "Ручной PDF", + Title = Path.GetFileNameWithoutExtension(sourcePath), + SourceUrl = null, + LocalPath = localPath, + IsManual = true + }; + + await _repository.SaveAttachmentAsync(attachment, cancellationToken); + added.Add(attachment); + } + + return added; + } + + public async Task SyncFromSiteAsync(int pagesToScan, IProgress progress, CancellationToken cancellationToken) + { + var result = new SyncResult(); + var totalPages = Math.Max(1, pagesToScan); + + for (var page = 1; page <= totalPages; page++) + { + cancellationToken.ThrowIfCancellationRequested(); + progress?.Report($"Чтение страницы {page}..."); + + IReadOnlyList items; + try + { + var catalogHtml = await _client.GetStringAsync(_client.BuildCatalogPageUrl(page), cancellationToken); + items = _catalogPageParser.Parse(catalogHtml, _client.Options.BaseUrl); + result.PagesScanned++; + } + catch (Exception ex) + { + result.FailedPages++; + progress?.Report($"Страница {page} пропущена: {ex.Message}"); + continue; + } + + foreach (var item in items) + { + cancellationToken.ThrowIfCancellationRequested(); + progress?.Report($"Обработка {item.RegistryNumber ?? item.Name}..."); + + try + { + var existingId = await _repository.FindInstrumentIdByRegistryNumberAsync(item.RegistryNumber, cancellationToken); + var existing = existingId.HasValue + ? await _repository.GetByIdAsync(existingId.Value, cancellationToken) + : null; + + ParsedInstrumentDetails details = null; + if (!string.IsNullOrWhiteSpace(item.DetailUrl)) + { + try + { + var detailHtml = await _client.GetStringAsync(item.DetailUrl, cancellationToken); + details = _detailPageParser.Parse(detailHtml, _client.Options.BaseUrl); + } + catch + { + result.SkippedDetailRequests++; + } + } + + var merged = Merge(existing, item, details); + merged.Id = existing?.Id ?? 0; + merged.SourceSystem = "KtoPoverit"; + merged.DetailUrl = item.DetailUrl ?? existing?.DetailUrl; + merged.LastImportedAt = DateTime.UtcNow; + + var savedId = await _repository.SaveAsync(merged, cancellationToken); + result.ProcessedItems++; + if (existing == null) + { + result.AddedRecords++; + } + else + { + result.UpdatedRecords++; + } + + await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Описание типа", details?.DescriptionTypePdfUrl ?? item.DescriptionTypePdfUrl, result, cancellationToken); + await SyncAttachmentAsync(savedId, merged.RegistryNumber, "Методика поверки", details?.MethodologyPdfUrl ?? item.MethodologyPdfUrl, result, cancellationToken); + + if (_client.Options.RequestDelayMilliseconds > 0) + { + await Task.Delay(_client.Options.RequestDelayMilliseconds, cancellationToken); + } + } + catch (Exception ex) + { + result.FailedItems++; + progress?.Report($"Запись {item.RegistryNumber ?? item.Name} пропущена: {ex.Message}"); + } + } + } + + progress?.Report($"Готово: страниц {result.PagesScanned}, записей {result.ProcessedItems}, проблемных записей {result.FailedItems}."); + return result; + } + + private async Task SyncAttachmentAsync(long instrumentId, string registryNumber, string title, string sourceUrl, SyncResult result, CancellationToken cancellationToken) + { + if (string.IsNullOrWhiteSpace(sourceUrl)) + { + return; + } + + var existing = await _repository.FindAttachmentBySourceUrlAsync(instrumentId, sourceUrl, cancellationToken); + if (existing != null && !string.IsNullOrWhiteSpace(existing.LocalPath) && File.Exists(existing.LocalPath)) + { + return; + } + + try + { + var localPath = await _pdfStorageService.DownloadAsync(sourceUrl, registryNumber, title, cancellationToken); + var attachment = existing ?? new PdfAttachment + { + InstrumentId = instrumentId, + IsManual = false + }; + + attachment.Kind = title; + attachment.Title = title; + attachment.SourceUrl = sourceUrl; + attachment.LocalPath = localPath; + + await _repository.SaveAttachmentAsync(attachment, cancellationToken); + result.DownloadedPdfFiles++; + } + catch + { + result.FailedPdfFiles++; + + if (existing == null) + { + await _repository.SaveAttachmentAsync(new PdfAttachment + { + InstrumentId = instrumentId, + Kind = title, + Title = title, + SourceUrl = sourceUrl, + LocalPath = null, + IsManual = false + }, cancellationToken); + } + } + } + + private static InstrumentRecord Merge(InstrumentRecord existing, CatalogListItem item, ParsedInstrumentDetails details) + { + var result = existing?.Clone() ?? new InstrumentRecord(); + + result.RegistryNumber = Prefer(details?.RegistryNumber, item?.RegistryNumber, existing?.RegistryNumber); + result.Name = Prefer(details?.Name, item?.Name, existing?.Name) ?? "Без названия"; + result.TypeDesignation = Prefer(details?.TypeDesignation, item?.TypeDesignation, existing?.TypeDesignation); + result.Manufacturer = Prefer(details?.Manufacturer, item?.Manufacturer, existing?.Manufacturer); + result.VerificationInterval = Prefer(details?.VerificationInterval, item?.VerificationInterval, existing?.VerificationInterval); + result.CertificateOrSerialNumber = Prefer(details?.CertificateOrSerialNumber, item?.CertificateOrSerialNumber, existing?.CertificateOrSerialNumber); + result.AllowsBatchVerification = Prefer(details?.AllowsBatchVerification, existing?.AllowsBatchVerification); + result.HasPeriodicVerification = Prefer(details?.HasPeriodicVerification, existing?.HasPeriodicVerification); + result.TypeInfo = Prefer(details?.TypeInfo, existing?.TypeInfo); + result.Purpose = Prefer(details?.Purpose, existing?.Purpose); + result.Description = Prefer(details?.Description, existing?.Description); + result.Software = Prefer(details?.Software, existing?.Software); + result.MetrologicalCharacteristics = Prefer(details?.MetrologicalCharacteristics, existing?.MetrologicalCharacteristics); + result.Completeness = Prefer(details?.Completeness, existing?.Completeness); + result.Verification = Prefer(details?.Verification, existing?.Verification); + result.RegulatoryDocuments = Prefer(details?.RegulatoryDocuments, existing?.RegulatoryDocuments); + result.Applicant = Prefer(details?.Applicant, existing?.Applicant); + result.TestCenter = Prefer(details?.TestCenter, existing?.TestCenter); + return result; + } + + private static string Prefer(params string[] values) + { + foreach (var value in values) + { + if (!string.IsNullOrWhiteSpace(value)) + { + return value.Trim(); + } + } + + return null; + } +} diff --git a/Services/InstrumentRepository.cs b/Services/InstrumentRepository.cs new file mode 100644 index 0000000..e32ef3c --- /dev/null +++ b/Services/InstrumentRepository.cs @@ -0,0 +1,526 @@ +using CRAWLER.Models; +using Microsoft.Data.SqlClient; + +namespace CRAWLER.Services; + +internal sealed class InstrumentRepository +{ + private readonly IDatabaseConnectionFactory _connectionFactory; + + public InstrumentRepository(IDatabaseConnectionFactory connectionFactory) + { + _connectionFactory = connectionFactory; + } + + public async Task> SearchAsync(string searchText, CancellationToken cancellationToken) + { + var items = new List(); + var hasFilter = !string.IsNullOrWhiteSpace(searchText); + + const string sql = @" +SELECT TOP (500) + Id, + RegistryNumber, + Name, + TypeDesignation, + Manufacturer, + VerificationInterval, + SourceSystem, + UpdatedAt +FROM dbo.Instruments +WHERE @Search IS NULL + OR RegistryNumber LIKE @Like + OR Name LIKE @Like + OR TypeDesignation LIKE @Like + OR Manufacturer LIKE @Like +ORDER BY + CASE WHEN RegistryNumber IS NULL OR RegistryNumber = N'' THEN 1 ELSE 0 END, + RegistryNumber DESC, + UpdatedAt DESC;"; + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@Search", hasFilter ? searchText.Trim() : DBNull.Value); + command.Parameters.AddWithValue("@Like", hasFilter ? $"%{searchText.Trim()}%" : DBNull.Value); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken); + while (await reader.ReadAsync(cancellationToken)) + { + items.Add(new InstrumentSummary + { + Id = reader.GetInt64(0), + RegistryNumber = GetString(reader, 1), + Name = GetString(reader, 2), + TypeDesignation = GetString(reader, 3), + Manufacturer = GetString(reader, 4), + VerificationInterval = GetString(reader, 5), + SourceSystem = GetString(reader, 6), + UpdatedAt = reader.GetDateTime(7) + }); + } + + return items; + } + + public async Task GetByIdAsync(long id, CancellationToken cancellationToken) + { + const string sql = @" +SELECT + Id, + RegistryNumber, + Name, + TypeDesignation, + Manufacturer, + VerificationInterval, + CertificateOrSerialNumber, + AllowsBatchVerification, + HasPeriodicVerification, + TypeInfo, + Purpose, + Description, + Software, + MetrologicalCharacteristics, + Completeness, + Verification, + RegulatoryDocuments, + Applicant, + TestCenter, + DetailUrl, + SourceSystem, + LastImportedAt, + CreatedAt, + UpdatedAt +FROM dbo.Instruments +WHERE Id = @Id;"; + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@Id", id); + + InstrumentRecord item = null; + await using (var reader = await command.ExecuteReaderAsync(cancellationToken)) + { + if (await reader.ReadAsync(cancellationToken)) + { + item = new InstrumentRecord + { + Id = reader.GetInt64(0), + RegistryNumber = GetString(reader, 1), + Name = GetString(reader, 2), + TypeDesignation = GetString(reader, 3), + Manufacturer = GetString(reader, 4), + VerificationInterval = GetString(reader, 5), + CertificateOrSerialNumber = GetString(reader, 6), + AllowsBatchVerification = GetString(reader, 7), + HasPeriodicVerification = GetString(reader, 8), + TypeInfo = GetString(reader, 9), + Purpose = GetString(reader, 10), + Description = GetString(reader, 11), + Software = GetString(reader, 12), + MetrologicalCharacteristics = GetString(reader, 13), + Completeness = GetString(reader, 14), + Verification = GetString(reader, 15), + RegulatoryDocuments = GetString(reader, 16), + Applicant = GetString(reader, 17), + TestCenter = GetString(reader, 18), + DetailUrl = GetString(reader, 19), + SourceSystem = GetString(reader, 20), + LastImportedAt = reader.IsDBNull(21) ? (DateTime?)null : reader.GetDateTime(21), + CreatedAt = reader.GetDateTime(22), + UpdatedAt = reader.GetDateTime(23) + }; + } + } + + if (item == null) + { + return null; + } + + item.Attachments = (await GetAttachmentsAsync(connection, id, cancellationToken)).ToList(); + return item; + } + + public async Task FindInstrumentIdByRegistryNumberAsync(string registryNumber, CancellationToken cancellationToken) + { + if (string.IsNullOrWhiteSpace(registryNumber)) + { + return null; + } + + const string sql = "SELECT Id FROM dbo.Instruments WHERE RegistryNumber = @RegistryNumber;"; + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@RegistryNumber", registryNumber.Trim()); + + var result = await command.ExecuteScalarAsync(cancellationToken); + if (result == null || result == DBNull.Value) + { + return null; + } + + return Convert.ToInt64(result); + } + + public async Task SaveAsync(InstrumentRecord record, CancellationToken cancellationToken) + { + if (record == null) + { + throw new ArgumentNullException(nameof(record)); + } + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + if (record.Id <= 0) + { + const string insertSql = @" +INSERT INTO dbo.Instruments +( + RegistryNumber, + Name, + TypeDesignation, + Manufacturer, + VerificationInterval, + CertificateOrSerialNumber, + AllowsBatchVerification, + HasPeriodicVerification, + TypeInfo, + Purpose, + Description, + Software, + MetrologicalCharacteristics, + Completeness, + Verification, + RegulatoryDocuments, + Applicant, + TestCenter, + DetailUrl, + SourceSystem, + LastImportedAt, + CreatedAt, + UpdatedAt +) +OUTPUT INSERTED.Id +VALUES +( + @RegistryNumber, + @Name, + @TypeDesignation, + @Manufacturer, + @VerificationInterval, + @CertificateOrSerialNumber, + @AllowsBatchVerification, + @HasPeriodicVerification, + @TypeInfo, + @Purpose, + @Description, + @Software, + @MetrologicalCharacteristics, + @Completeness, + @Verification, + @RegulatoryDocuments, + @Applicant, + @TestCenter, + @DetailUrl, + @SourceSystem, + @LastImportedAt, + SYSUTCDATETIME(), + SYSUTCDATETIME() +);"; + + await using var command = CreateRecordCommand(insertSql, connection, record); + var id = await command.ExecuteScalarAsync(cancellationToken); + return Convert.ToInt64(id); + } + + const string updateSql = @" +UPDATE dbo.Instruments +SET + RegistryNumber = @RegistryNumber, + Name = @Name, + TypeDesignation = @TypeDesignation, + Manufacturer = @Manufacturer, + VerificationInterval = @VerificationInterval, + CertificateOrSerialNumber = @CertificateOrSerialNumber, + AllowsBatchVerification = @AllowsBatchVerification, + HasPeriodicVerification = @HasPeriodicVerification, + TypeInfo = @TypeInfo, + Purpose = @Purpose, + Description = @Description, + Software = @Software, + MetrologicalCharacteristics = @MetrologicalCharacteristics, + Completeness = @Completeness, + Verification = @Verification, + RegulatoryDocuments = @RegulatoryDocuments, + Applicant = @Applicant, + TestCenter = @TestCenter, + DetailUrl = @DetailUrl, + SourceSystem = @SourceSystem, + LastImportedAt = @LastImportedAt, + UpdatedAt = SYSUTCDATETIME() +WHERE Id = @Id;"; + + await using (var command = CreateRecordCommand(updateSql, connection, record)) + { + command.Parameters.AddWithValue("@Id", record.Id); + await command.ExecuteNonQueryAsync(cancellationToken); + } + + return record.Id; + } + + public async Task FindAttachmentBySourceUrlAsync(long instrumentId, string sourceUrl, CancellationToken cancellationToken) + { + if (string.IsNullOrWhiteSpace(sourceUrl)) + { + return null; + } + + const string sql = @" +SELECT + Id, + InstrumentId, + Kind, + Title, + SourceUrl, + LocalPath, + IsManual, + CreatedAt +FROM dbo.PdfAttachments +WHERE InstrumentId = @InstrumentId + AND SourceUrl = @SourceUrl;"; + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@InstrumentId", instrumentId); + command.Parameters.AddWithValue("@SourceUrl", sourceUrl); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken); + if (!await reader.ReadAsync(cancellationToken)) + { + return null; + } + + return new PdfAttachment + { + Id = reader.GetInt64(0), + InstrumentId = reader.GetInt64(1), + Kind = GetString(reader, 2), + Title = GetString(reader, 3), + SourceUrl = GetString(reader, 4), + LocalPath = GetString(reader, 5), + IsManual = reader.GetBoolean(6), + CreatedAt = reader.GetDateTime(7) + }; + } + + public async Task SaveAttachmentAsync(PdfAttachment attachment, CancellationToken cancellationToken) + { + if (attachment == null) + { + throw new ArgumentNullException(nameof(attachment)); + } + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + if (attachment.Id <= 0) + { + const string insertSql = @" +INSERT INTO dbo.PdfAttachments +( + InstrumentId, + Kind, + Title, + SourceUrl, + LocalPath, + IsManual, + CreatedAt +) +VALUES +( + @InstrumentId, + @Kind, + @Title, + @SourceUrl, + @LocalPath, + @IsManual, + SYSUTCDATETIME() +);"; + + await using var command = CreateAttachmentCommand(insertSql, connection, attachment); + await command.ExecuteNonQueryAsync(cancellationToken); + return; + } + + const string updateSql = @" +UPDATE dbo.PdfAttachments +SET + Kind = @Kind, + Title = @Title, + SourceUrl = @SourceUrl, + LocalPath = @LocalPath, + IsManual = @IsManual +WHERE Id = @Id;"; + + await using (var command = CreateAttachmentCommand(updateSql, connection, attachment)) + { + command.Parameters.AddWithValue("@Id", attachment.Id); + await command.ExecuteNonQueryAsync(cancellationToken); + } + } + + public async Task DeleteAttachmentAsync(long attachmentId, CancellationToken cancellationToken) + { + const string sql = "DELETE FROM dbo.PdfAttachments WHERE Id = @Id;"; + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@Id", attachmentId); + await command.ExecuteNonQueryAsync(cancellationToken); + } + + public async Task DeleteInstrumentAsync(long id, CancellationToken cancellationToken) + { + const string sql = "DELETE FROM dbo.Instruments WHERE Id = @Id;"; + + await using var connection = _connectionFactory.CreateConnection(); + await connection.OpenAsync(cancellationToken); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@Id", id); + await command.ExecuteNonQueryAsync(cancellationToken); + } + + private async Task> GetAttachmentsAsync(SqlConnection connection, long instrumentId, CancellationToken cancellationToken) + { + const string sql = @" +SELECT + Id, + InstrumentId, + Kind, + Title, + SourceUrl, + LocalPath, + IsManual, + CreatedAt +FROM dbo.PdfAttachments +WHERE InstrumentId = @InstrumentId +ORDER BY CreatedAt DESC, Id DESC;"; + + var items = new List(); + + await using var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + command.Parameters.AddWithValue("@InstrumentId", instrumentId); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken); + while (await reader.ReadAsync(cancellationToken)) + { + items.Add(new PdfAttachment + { + Id = reader.GetInt64(0), + InstrumentId = reader.GetInt64(1), + Kind = GetString(reader, 2), + Title = GetString(reader, 3), + SourceUrl = GetString(reader, 4), + LocalPath = GetString(reader, 5), + IsManual = reader.GetBoolean(6), + CreatedAt = reader.GetDateTime(7) + }); + } + + return items; + } + + private SqlCommand CreateRecordCommand(string sql, SqlConnection connection, InstrumentRecord record) + { + var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + + command.Parameters.AddWithValue("@RegistryNumber", ToDbValue(record.RegistryNumber)); + command.Parameters.AddWithValue("@Name", string.IsNullOrWhiteSpace(record.Name) ? "Без названия" : record.Name.Trim()); + command.Parameters.AddWithValue("@TypeDesignation", ToDbValue(record.TypeDesignation)); + command.Parameters.AddWithValue("@Manufacturer", ToDbValue(record.Manufacturer)); + command.Parameters.AddWithValue("@VerificationInterval", ToDbValue(record.VerificationInterval)); + command.Parameters.AddWithValue("@CertificateOrSerialNumber", ToDbValue(record.CertificateOrSerialNumber)); + command.Parameters.AddWithValue("@AllowsBatchVerification", ToDbValue(record.AllowsBatchVerification)); + command.Parameters.AddWithValue("@HasPeriodicVerification", ToDbValue(record.HasPeriodicVerification)); + command.Parameters.AddWithValue("@TypeInfo", ToDbValue(record.TypeInfo)); + command.Parameters.AddWithValue("@Purpose", ToDbValue(record.Purpose)); + command.Parameters.AddWithValue("@Description", ToDbValue(record.Description)); + command.Parameters.AddWithValue("@Software", ToDbValue(record.Software)); + command.Parameters.AddWithValue("@MetrologicalCharacteristics", ToDbValue(record.MetrologicalCharacteristics)); + command.Parameters.AddWithValue("@Completeness", ToDbValue(record.Completeness)); + command.Parameters.AddWithValue("@Verification", ToDbValue(record.Verification)); + command.Parameters.AddWithValue("@RegulatoryDocuments", ToDbValue(record.RegulatoryDocuments)); + command.Parameters.AddWithValue("@Applicant", ToDbValue(record.Applicant)); + command.Parameters.AddWithValue("@TestCenter", ToDbValue(record.TestCenter)); + command.Parameters.AddWithValue("@DetailUrl", ToDbValue(record.DetailUrl)); + command.Parameters.AddWithValue("@SourceSystem", string.IsNullOrWhiteSpace(record.SourceSystem) ? "Manual" : record.SourceSystem.Trim()); + command.Parameters.AddWithValue("@LastImportedAt", record.LastImportedAt.HasValue ? record.LastImportedAt.Value : DBNull.Value); + + return command; + } + + private SqlCommand CreateAttachmentCommand(string sql, SqlConnection connection, PdfAttachment attachment) + { + var command = new SqlCommand(sql, connection) + { + CommandTimeout = _connectionFactory.Options.CommandTimeoutSeconds + }; + + command.Parameters.AddWithValue("@InstrumentId", attachment.InstrumentId); + command.Parameters.AddWithValue("@Kind", string.IsNullOrWhiteSpace(attachment.Kind) ? "PDF" : attachment.Kind.Trim()); + command.Parameters.AddWithValue("@Title", ToDbValue(attachment.Title)); + command.Parameters.AddWithValue("@SourceUrl", ToDbValue(attachment.SourceUrl)); + command.Parameters.AddWithValue("@LocalPath", ToDbValue(attachment.LocalPath)); + command.Parameters.AddWithValue("@IsManual", attachment.IsManual); + + return command; + } + + private static object ToDbValue(string value) + { + return string.IsNullOrWhiteSpace(value) ? DBNull.Value : value.Trim(); + } + + private static string GetString(SqlDataReader reader, int index) + { + return reader.IsDBNull(index) ? null : reader.GetString(index); + } +} diff --git a/Services/KtoPoveritClient.cs b/Services/KtoPoveritClient.cs new file mode 100644 index 0000000..17f7230 --- /dev/null +++ b/Services/KtoPoveritClient.cs @@ -0,0 +1,187 @@ +using System.Net; +using System.Net.Http; +using System.Net.Http.Headers; +using CRAWLER.Configuration; +using Microsoft.Extensions.Configuration; + +namespace CRAWLER.Services; + +internal sealed class KtoPoveritClient : IDisposable +{ + private readonly CrawlerOptions _options; + private readonly HttpClient _httpClient; + + public KtoPoveritClient(IConfiguration configuration) + { + _options = configuration.GetSection("Crawler").Get() + ?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json."); + + var handler = new SocketsHttpHandler + { + AutomaticDecompression = DecompressionMethods.All, + AllowAutoRedirect = false + }; + + _httpClient = new HttpClient(handler) + { + Timeout = TimeSpan.FromSeconds(Math.Max(5, _options.TimeoutSeconds)) + }; + _httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(_options.UserAgent); + _httpClient.DefaultRequestHeaders.AcceptLanguage.ParseAdd("ru-RU,ru;q=0.9,en-US;q=0.8"); + } + + public CrawlerOptions Options + { + get { return _options; } + } + + public async Task GetStringAsync(string url, CancellationToken cancellationToken) + { + using var request = CreateRequest(url); + using var response = await SendAsync(request, cancellationToken); + return await response.Content.ReadAsStringAsync(cancellationToken); + } + + public async Task GetBytesAsync(string url, CancellationToken cancellationToken) + { + using var request = CreateRequest(url); + using var response = await SendAsync(request, cancellationToken); + return await response.Content.ReadAsByteArrayAsync(cancellationToken); + } + + public string BuildCatalogPageUrl(int page) + { + var relative = string.Format(_options.CatalogPathFormat, page); + return BuildAbsoluteUrl(relative); + } + + public string BuildAbsoluteUrl(string urlOrPath) + { + if (string.IsNullOrWhiteSpace(urlOrPath)) + { + return null; + } + + if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri)) + { + return absoluteUri.ToString(); + } + + var baseUri = new Uri(_options.BaseUrl.TrimEnd('/') + "/"); + return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString(); + } + + private HttpRequestMessage CreateRequest(string url) + { + return new HttpRequestMessage(HttpMethod.Get, url) + { + Version = HttpVersion.Version11, + VersionPolicy = HttpVersionPolicy.RequestVersionOrLower + }; + } + + private async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + var currentUri = request.RequestUri ?? throw new InvalidOperationException("Не задан URL запроса."); + const int maxRedirects = 10; + + try + { + for (var redirectIndex = 0; redirectIndex <= maxRedirects; redirectIndex++) + { + using var currentRequest = CreateRequest(currentUri.ToString()); + var response = await _httpClient.SendAsync(currentRequest, HttpCompletionOption.ResponseContentRead, cancellationToken); + + if (IsRedirectStatusCode(response.StatusCode)) + { + var redirectUri = ResolveRedirectUri(currentUri, response.Headers); + response.Dispose(); + + if (redirectUri == null) + { + throw new InvalidOperationException( + $"Сайт вернул {(int)response.StatusCode} для {currentUri}, но не прислал корректный адрес перенаправления."); + } + + currentUri = redirectUri; + continue; + } + + if ((int)response.StatusCode >= 200 && (int)response.StatusCode <= 299) + { + return response; + } + + var statusCode = (int)response.StatusCode; + var reasonPhrase = response.ReasonPhrase; + response.Dispose(); + throw new HttpRequestException( + $"Response status code does not indicate success: {statusCode} ({reasonPhrase})."); + } + + throw new InvalidOperationException( + $"Превышено число перенаправлений ({maxRedirects}) для {currentUri}."); + } + catch (Exception ex) + { + throw new InvalidOperationException( + $"Не удалось получить данные с сайта Кто поверит: {request.RequestUri}. {ex.Message}", + ex); + } + } + + private static bool IsRedirectStatusCode(HttpStatusCode statusCode) + { + return statusCode == HttpStatusCode.Moved + || statusCode == HttpStatusCode.Redirect + || statusCode == HttpStatusCode.RedirectMethod + || statusCode == HttpStatusCode.TemporaryRedirect + || (int)statusCode == 308; + } + + private static Uri ResolveRedirectUri(Uri currentUri, HttpResponseHeaders headers) + { + if (headers.Location != null) + { + return headers.Location.IsAbsoluteUri + ? headers.Location + : new Uri(currentUri, headers.Location); + } + + if (!headers.TryGetValues("Location", out var values)) + { + return null; + } + + var rawLocation = values.FirstOrDefault(); + if (string.IsNullOrWhiteSpace(rawLocation)) + { + return null; + } + + if (Uri.TryCreate(rawLocation, UriKind.Absolute, out var absoluteUri)) + { + return absoluteUri; + } + + if (Uri.TryCreate(currentUri, rawLocation, out var relativeUri)) + { + return relativeUri; + } + + var escaped = Uri.EscapeUriString(rawLocation); + if (Uri.TryCreate(escaped, UriKind.Absolute, out absoluteUri)) + { + return absoluteUri; + } + + return Uri.TryCreate(currentUri, escaped, out relativeUri) + ? relativeUri + : null; + } + + public void Dispose() + { + _httpClient.Dispose(); + } +} diff --git a/Services/PdfShellService.cs b/Services/PdfShellService.cs new file mode 100644 index 0000000..317e4fd --- /dev/null +++ b/Services/PdfShellService.cs @@ -0,0 +1,46 @@ +using System.Diagnostics; +using System.IO; +using CRAWLER.Models; + +namespace CRAWLER.Services; + +internal interface IPdfOpener +{ + void OpenAttachment(PdfAttachment attachment); + void OpenUri(string uri); +} + +internal sealed class PdfShellService : IPdfOpener +{ + public void OpenAttachment(PdfAttachment attachment) + { + if (attachment == null) + { + return; + } + + if (!string.IsNullOrWhiteSpace(attachment.LocalPath) && File.Exists(attachment.LocalPath)) + { + OpenUri(attachment.LocalPath); + return; + } + + if (!string.IsNullOrWhiteSpace(attachment.SourceUrl)) + { + OpenUri(attachment.SourceUrl); + } + } + + public void OpenUri(string uri) + { + if (string.IsNullOrWhiteSpace(uri)) + { + return; + } + + Process.Start(new ProcessStartInfo(uri) + { + UseShellExecute = true + }); + } +} diff --git a/Services/PdfStorageService.cs b/Services/PdfStorageService.cs new file mode 100644 index 0000000..df558cd --- /dev/null +++ b/Services/PdfStorageService.cs @@ -0,0 +1,91 @@ +using System.IO; +using System.Linq; +using CRAWLER.Configuration; +using Microsoft.Extensions.Configuration; + +namespace CRAWLER.Services; + +internal sealed class PdfStorageService +{ + private readonly KtoPoveritClient _client; + private readonly string _rootPath; + + public PdfStorageService(IConfiguration configuration, KtoPoveritClient client) + { + _client = client; + var options = configuration.GetSection("Crawler").Get() + ?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json."); + _rootPath = Environment.ExpandEnvironmentVariables(options.PdfStoragePath); + Directory.CreateDirectory(_rootPath); + } + + public async Task DownloadAsync(string sourceUrl, string registryNumber, string title, CancellationToken cancellationToken) + { + var bytes = await _client.GetBytesAsync(sourceUrl, cancellationToken); + var fullPath = BuildTargetPath(registryNumber, title, sourceUrl); + await File.WriteAllBytesAsync(fullPath, bytes, cancellationToken); + return fullPath; + } + + public async Task CopyFromLocalAsync(string sourcePath, string registryNumber, string title, CancellationToken cancellationToken) + { + var fullPath = BuildTargetPath(registryNumber, title, sourcePath); + + await using var sourceStream = File.Open(sourcePath, FileMode.Open, FileAccess.Read, FileShare.Read); + await using var targetStream = File.Create(fullPath); + await sourceStream.CopyToAsync(targetStream, cancellationToken); + return fullPath; + } + + public void TryDelete(string path) + { + try + { + if (!string.IsNullOrWhiteSpace(path) && File.Exists(path)) + { + File.Delete(path); + } + } + catch + { + } + } + + private string BuildTargetPath(string registryNumber, string title, string sourceIdentity) + { + var safeFolder = MakeSafePathSegment(string.IsNullOrWhiteSpace(registryNumber) ? "manual" : registryNumber); + var folder = Path.Combine(_rootPath, safeFolder); + Directory.CreateDirectory(folder); + + var baseName = MakeSafePathSegment(string.IsNullOrWhiteSpace(title) ? Path.GetFileNameWithoutExtension(sourceIdentity) : title); + if (string.IsNullOrWhiteSpace(baseName)) + { + baseName = "document"; + } + + var fullPath = Path.Combine(folder, baseName + ".pdf"); + if (!File.Exists(fullPath)) + { + return fullPath; + } + + var counter = 2; + while (true) + { + var candidate = Path.Combine(folder, $"{baseName}-{counter}.pdf"); + if (!File.Exists(candidate)) + { + return candidate; + } + + counter++; + } + } + + private static string MakeSafePathSegment(string value) + { + var invalid = Path.GetInvalidFileNameChars(); + var cleaned = new string((value ?? string.Empty).Select(ch => invalid.Contains(ch) ? '_' : ch).ToArray()).Trim(); + return string.IsNullOrWhiteSpace(cleaned) ? "file" : cleaned; + } +} diff --git a/Services/SqlServerConnectionFactory.cs b/Services/SqlServerConnectionFactory.cs new file mode 100644 index 0000000..57b9579 --- /dev/null +++ b/Services/SqlServerConnectionFactory.cs @@ -0,0 +1,55 @@ +using CRAWLER.Configuration; +using Microsoft.Data.SqlClient; +using Microsoft.Extensions.Configuration; + +namespace CRAWLER.Services; + +internal interface IDatabaseConnectionFactory +{ + SqlConnection CreateConnection(); + SqlConnection CreateMasterConnection(); + DatabaseOptions Options { get; } +} + +internal sealed class SqlServerConnectionFactory : IDatabaseConnectionFactory +{ + public SqlServerConnectionFactory(IConfiguration configuration) + { + Options = configuration.GetSection("Database").Get() + ?? throw new InvalidOperationException("Раздел Database не найден в appsettings.json."); + } + + public DatabaseOptions Options { get; } + + public SqlConnection CreateConnection() + { + return new SqlConnection(BuildConnectionString(Options.Database)); + } + + public SqlConnection CreateMasterConnection() + { + return new SqlConnection(BuildConnectionString("master")); + } + + private string BuildConnectionString(string databaseName) + { + var builder = new SqlConnectionStringBuilder + { + ApplicationName = Options.ApplicationName, + DataSource = Options.Server, + InitialCatalog = databaseName, + ConnectTimeout = Options.ConnectTimeoutSeconds, + Encrypt = Options.Encrypt, + IntegratedSecurity = Options.IntegratedSecurity, + MultipleActiveResultSets = Options.MultipleActiveResultSets, + Pooling = Options.Pooling, + MaxPoolSize = Options.MaxPoolSize, + MinPoolSize = Options.MinPoolSize, + TrustServerCertificate = Options.TrustServerCertificate, + ConnectRetryCount = Options.ConnectRetryCount, + ConnectRetryInterval = Options.ConnectRetryIntervalSeconds + }; + + return builder.ConnectionString; + } +} diff --git a/ViewModels/EditInstrumentWindowViewModel.cs b/ViewModels/EditInstrumentWindowViewModel.cs new file mode 100644 index 0000000..9cdeeac --- /dev/null +++ b/ViewModels/EditInstrumentWindowViewModel.cs @@ -0,0 +1,79 @@ +using System.Collections.ObjectModel; +using System.Linq; +using CRAWLER.Infrastructure; +using CRAWLER.Models; + +namespace CRAWLER.ViewModels; + +internal sealed class EditInstrumentWindowViewModel : ObservableObject +{ + private readonly InstrumentRecord _draft; + private PendingPdfFile _selectedPendingPdf; + + public EditInstrumentWindowViewModel(InstrumentRecord draft, bool isNewRecord) + { + _draft = draft ?? new InstrumentRecord(); + PendingPdfFiles = new ObservableCollection(); + ExistingAttachments = new ObservableCollection(_draft.Attachments ?? Enumerable.Empty()); + WindowTitle = isNewRecord ? "Новая запись" : $"Редактирование: {(_draft.RegistryNumber ?? _draft.Name)}"; + } + + public string WindowTitle { get; } + + public ObservableCollection PendingPdfFiles { get; } + + public ObservableCollection ExistingAttachments { get; } + + public PendingPdfFile SelectedPendingPdf + { + get { return _selectedPendingPdf; } + set { SetProperty(ref _selectedPendingPdf, value); } + } + + public InstrumentRecord Draft + { + get { return _draft; } + } + + public void AddPendingFiles(IReadOnlyList paths) + { + foreach (var path in paths.Where(path => !string.IsNullOrWhiteSpace(path))) + { + if (PendingPdfFiles.Any(item => string.Equals(item.SourcePath, path, StringComparison.OrdinalIgnoreCase))) + { + continue; + } + + PendingPdfFiles.Add(new PendingPdfFile + { + SourcePath = path, + DisplayName = System.IO.Path.GetFileName(path) + }); + } + } + + public void RemovePendingSelected() + { + if (SelectedPendingPdf != null) + { + PendingPdfFiles.Remove(SelectedPendingPdf); + } + } + + public string[] GetPendingPaths() + { + return PendingPdfFiles.Select(item => item.SourcePath).ToArray(); + } + + public bool Validate(out string errorMessage) + { + if (string.IsNullOrWhiteSpace(Draft.Name)) + { + errorMessage = "Укажите наименование средства измерения."; + return false; + } + + errorMessage = null; + return true; + } +} diff --git a/ViewModels/MainWindowViewModel.cs b/ViewModels/MainWindowViewModel.cs new file mode 100644 index 0000000..41dcf5d --- /dev/null +++ b/ViewModels/MainWindowViewModel.cs @@ -0,0 +1,266 @@ +using System; +using System.Collections.ObjectModel; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using CRAWLER.Infrastructure; +using CRAWLER.Models; +using CRAWLER.Services; + +namespace CRAWLER.ViewModels; + +internal sealed class MainWindowViewModel : ObservableObject +{ + private readonly InstrumentCatalogService _catalogService; + private readonly IPdfOpener _pdfOpener; + private InstrumentSummary _selectedSummary; + private InstrumentRecord _selectedInstrument; + private string _searchText; + private int _pagesToScan; + private string _statusText; + private bool _isBusy; + private CancellationTokenSource _selectionCancellationTokenSource; + + public MainWindowViewModel(InstrumentCatalogService catalogService, IPdfOpener pdfOpener) + { + _catalogService = catalogService; + _pdfOpener = pdfOpener; + _pagesToScan = _catalogService.DefaultPagesToScan; + _statusText = "Готово."; + Instruments = new ObservableCollection(); + } + + public ObservableCollection Instruments { get; } + + public InstrumentSummary SelectedSummary + { + get { return _selectedSummary; } + set + { + if (SetProperty(ref _selectedSummary, value)) + { + _ = LoadSelectedInstrumentAsync(value?.Id); + } + } + } + + public InstrumentRecord SelectedInstrument + { + get { return _selectedInstrument; } + private set { SetProperty(ref _selectedInstrument, value); } + } + + public string SearchText + { + get { return _searchText; } + set { SetProperty(ref _searchText, value); } + } + + public int PagesToScan + { + get { return _pagesToScan; } + set { SetProperty(ref _pagesToScan, value < 1 ? 1 : value); } + } + + public string StatusText + { + get { return _statusText; } + private set { SetProperty(ref _statusText, value); } + } + + public bool IsBusy + { + get { return _isBusy; } + private set { SetProperty(ref _isBusy, value); } + } + + public async Task InitializeAsync() + { + await RunBusyAsync(async () => + { + StatusText = "Подготовка базы данных..."; + await _catalogService.InitializeAsync(CancellationToken.None); + await RefreshAsync(); + }); + } + + public async Task RefreshAsync(long? selectId = null) + { + await RunBusyAsync(async () => + { + StatusText = "Загрузка списка записей..."; + + var items = await _catalogService.SearchAsync(SearchText, CancellationToken.None); + Instruments.Clear(); + foreach (var item in items) + { + Instruments.Add(item); + } + + if (Instruments.Count == 0) + { + SelectedInstrument = null; + SelectedSummary = null; + StatusText = "Записи не найдены."; + return; + } + + var summary = selectId.HasValue + ? Instruments.FirstOrDefault(item => item.Id == selectId.Value) + : SelectedSummary == null + ? Instruments.FirstOrDefault() + : Instruments.FirstOrDefault(item => item.Id == SelectedSummary.Id) ?? Instruments.FirstOrDefault(); + + SelectedSummary = summary; + StatusText = $"Загружено записей: {Instruments.Count}."; + }); + } + + public async Task SyncAsync() + { + SyncResult result = null; + + await RunBusyAsync(async () => + { + var progress = new Progress(message => StatusText = message); + result = await _catalogService.SyncFromSiteAsync(PagesToScan, progress, CancellationToken.None); + await RefreshAsync(SelectedSummary?.Id); + }); + + return result; + } + + public InstrumentRecord CreateNewDraft() + { + return new InstrumentRecord + { + SourceSystem = "Manual" + }; + } + + public InstrumentRecord CreateDraftFromSelected() + { + return SelectedInstrument?.Clone(); + } + + public async Task SaveAsync(InstrumentRecord draft, System.Collections.Generic.IEnumerable pendingPdfPaths) + { + long id = 0; + + await RunBusyAsync(async () => + { + StatusText = "Сохранение записи..."; + id = await _catalogService.SaveInstrumentAsync(draft, pendingPdfPaths, CancellationToken.None); + await RefreshAsync(id); + StatusText = "Изменения сохранены."; + }); + + return id; + } + + public async Task DeleteSelectedAsync() + { + if (SelectedInstrument == null) + { + return; + } + + var deletedId = SelectedInstrument.Id; + + await RunBusyAsync(async () => + { + StatusText = "Удаление записи..."; + await _catalogService.DeleteInstrumentAsync(SelectedInstrument, CancellationToken.None); + await RefreshAsync(); + StatusText = $"Запись {deletedId} удалена."; + }); + } + + public async Task AddAttachmentsToSelectedAsync(System.Collections.Generic.IEnumerable paths) + { + if (SelectedInstrument == null) + { + return; + } + + await RunBusyAsync(async () => + { + StatusText = "Копирование PDF-файлов..."; + await _catalogService.AddManualAttachmentsAsync(SelectedInstrument.Id, SelectedInstrument.RegistryNumber, paths, CancellationToken.None); + await LoadSelectedInstrumentAsync(SelectedInstrument.Id); + StatusText = "PDF-файлы добавлены."; + }); + } + + public async Task RemoveAttachmentAsync(PdfAttachment attachment) + { + if (attachment == null || SelectedInstrument == null) + { + return; + } + + await RunBusyAsync(async () => + { + StatusText = "Удаление PDF-файла..."; + await _catalogService.RemoveAttachmentAsync(attachment, CancellationToken.None); + await LoadSelectedInstrumentAsync(SelectedInstrument.Id); + StatusText = "PDF-файл удалён."; + }); + } + + public void OpenAttachment(PdfAttachment attachment) + { + _pdfOpener.OpenAttachment(attachment); + } + + public void OpenSourceUrl() + { + if (SelectedInstrument != null && !string.IsNullOrWhiteSpace(SelectedInstrument.DetailUrl)) + { + _pdfOpener.OpenUri(SelectedInstrument.DetailUrl); + } + } + + private async Task LoadSelectedInstrumentAsync(long? id) + { + _selectionCancellationTokenSource?.Cancel(); + _selectionCancellationTokenSource = new CancellationTokenSource(); + var token = _selectionCancellationTokenSource.Token; + + if (!id.HasValue) + { + SelectedInstrument = null; + return; + } + + try + { + var instrument = await _catalogService.GetByIdAsync(id.Value, token); + if (!token.IsCancellationRequested) + { + SelectedInstrument = instrument; + } + } + catch (OperationCanceledException) + { + } + } + + private async Task RunBusyAsync(Func action) + { + if (IsBusy) + { + return; + } + + try + { + IsBusy = true; + await action(); + } + finally + { + IsBusy = false; + } + } +} diff --git a/appsettings.json b/appsettings.json new file mode 100644 index 0000000..7485248 --- /dev/null +++ b/appsettings.json @@ -0,0 +1,27 @@ +{ + "Database": { + "ApplicationName": "CRAWLER", + "CommandTimeoutSeconds": 60, + "ConnectRetryCount": 3, + "ConnectRetryIntervalSeconds": 5, + "ConnectTimeoutSeconds": 15, + "Database": "CRAWLER", + "Encrypt": false, + "IntegratedSecurity": true, + "MultipleActiveResultSets": true, + "Pooling": true, + "MaxPoolSize": 100, + "MinPoolSize": 0, + "Server": "SEVENHILL\\SQLEXPRESS", + "TrustServerCertificate": true + }, + "Crawler": { + "BaseUrl": "https://www.ktopoverit.ru", + "CatalogPathFormat": "/poverka/gosreestr_sredstv_izmereniy?page={0}", + "RequestDelayMilliseconds": 350, + "DefaultPagesToScan": 1, + "PdfStoragePath": "%LOCALAPPDATA%\\CRAWLER\\PdfStore", + "TimeoutSeconds": 30, + "UserAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) CRAWLER/1.0" + } +}
№ в госреестре
Наименование
Тип
Описание типа
Методики поверки
МПИ
Cвидетельствозавод. номер
Производитель