Files
CROWLER/Parsing/DetailPageParser.cs
2026-04-04 10:52:30 +03:00

66 lines
3.2 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System;
using System.Collections.Generic;
using CRAWLER.Models;
using HtmlAgilityPack;
namespace CRAWLER.Parsing;
internal sealed class DetailPageParser
{
public ParsedInstrumentDetails Parse(string html, string baseUrl)
{
var document = new HtmlDocument();
document.LoadHtml(html ?? string.Empty);
var rows = document.DocumentNode.SelectNodes("//table[contains(@class,'resulttable1')]//tr");
var values = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
if (rows != null)
{
foreach (var row in rows)
{
var cells = row.SelectNodes("./td");
if (cells == null || cells.Count < 2)
{
continue;
}
var label = HtmlParsingHelpers.NormalizeLabel(cells[0].InnerText);
var value = HtmlParsingHelpers.NormalizeWhitespace(cells[1].InnerText);
if (!string.IsNullOrWhiteSpace(label))
{
values[label] = value;
}
}
}
return new ParsedInstrumentDetails
{
RegistryNumber = Get(values, "Номер в госреестре"),
Name = Get(values, "Наименование"),
TypeDesignation = Get(values, "Обозначение типа"),
Manufacturer = Get(values, "Производитель"),
VerificationInterval = Get(values, "Межповерочный интервал (МПИ)"),
CertificateOrSerialNumber = Get(values, "Срок свидетельства или заводской номер"),
AllowsBatchVerification = Get(values, "Допускается поверка партии"),
HasPeriodicVerification = Get(values, "Наличие периодической поверки"),
TypeInfo = Get(values, "Сведения о типе"),
Purpose = Get(values, "Назначение"),
Description = Get(values, "Описание"),
Software = Get(values, "Программное обеспечение"),
MetrologicalCharacteristics = Get(values, "Метрологические и технические характеристики"),
Completeness = Get(values, "Комплектность"),
Verification = Get(values, "Поверка"),
RegulatoryDocuments = Get(values, "Нормативные и технические документы"),
Applicant = Get(values, "Заявитель"),
TestCenter = Get(values, "Испытательный центр"),
DescriptionTypePdfUrl = HtmlParsingHelpers.MakeAbsoluteUrl(baseUrl, document.DocumentNode.SelectSingleNode("//a[contains(@href,'/prof/opisanie/')]")?.GetAttributeValue("href", null)),
MethodologyPdfUrl = HtmlParsingHelpers.MakeAbsoluteUrl(baseUrl, document.DocumentNode.SelectSingleNode("//a[contains(@href,'/prof/metodiki/')]")?.GetAttributeValue("href", null))
};
}
private static string Get(IDictionary<string, string> values, string key)
{
return values.TryGetValue(key, out var value) ? value : null;
}
}