using System.Net; using System.Net.Http; using System.Net.Http.Headers; using CRAWLER.Configuration; using Microsoft.Extensions.Configuration; namespace CRAWLER.Services; internal sealed class KtoPoveritClient : IDisposable { private readonly CrawlerOptions _options; private readonly HttpClient _httpClient; public KtoPoveritClient(IConfiguration configuration) { _options = configuration.GetSection("Crawler").Get() ?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json."); var handler = new SocketsHttpHandler { AutomaticDecompression = DecompressionMethods.All, AllowAutoRedirect = false }; _httpClient = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(Math.Max(5, _options.TimeoutSeconds)) }; _httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(_options.UserAgent); _httpClient.DefaultRequestHeaders.AcceptLanguage.ParseAdd("ru-RU,ru;q=0.9,en-US;q=0.8"); } public CrawlerOptions Options { get { return _options; } } public async Task GetStringAsync(string url, CancellationToken cancellationToken) { using var request = CreateRequest(url); using var response = await SendAsync(request, cancellationToken); return await response.Content.ReadAsStringAsync(cancellationToken); } public async Task GetBytesAsync(string url, CancellationToken cancellationToken) { using var request = CreateRequest(url); using var response = await SendAsync(request, cancellationToken); return await response.Content.ReadAsByteArrayAsync(cancellationToken); } public string BuildCatalogPageUrl(int page) { var relative = string.Format(_options.CatalogPathFormat, page); return BuildAbsoluteUrl(relative); } public string BuildAbsoluteUrl(string urlOrPath) { if (string.IsNullOrWhiteSpace(urlOrPath)) { return null; } if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri)) { return absoluteUri.ToString(); } var baseUri = new Uri(_options.BaseUrl.TrimEnd('/') + "/"); return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString(); } private HttpRequestMessage CreateRequest(string url) { return new HttpRequestMessage(HttpMethod.Get, url) { Version = HttpVersion.Version11, VersionPolicy = HttpVersionPolicy.RequestVersionOrLower }; } private async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) { var currentUri = request.RequestUri ?? throw new InvalidOperationException("Не задан URL запроса."); const int maxRedirects = 10; try { for (var redirectIndex = 0; redirectIndex <= maxRedirects; redirectIndex++) { using var currentRequest = CreateRequest(currentUri.ToString()); var response = await _httpClient.SendAsync(currentRequest, HttpCompletionOption.ResponseContentRead, cancellationToken); if (IsRedirectStatusCode(response.StatusCode)) { var redirectUri = ResolveRedirectUri(currentUri, response.Headers); response.Dispose(); if (redirectUri == null) { throw new InvalidOperationException( $"Сайт вернул {(int)response.StatusCode} для {currentUri}, но не прислал корректный адрес перенаправления."); } currentUri = redirectUri; continue; } if ((int)response.StatusCode >= 200 && (int)response.StatusCode <= 299) { return response; } var statusCode = (int)response.StatusCode; var reasonPhrase = response.ReasonPhrase; response.Dispose(); throw new HttpRequestException( $"Response status code does not indicate success: {statusCode} ({reasonPhrase})."); } throw new InvalidOperationException( $"Превышено число перенаправлений ({maxRedirects}) для {currentUri}."); } catch (Exception ex) { throw new InvalidOperationException( $"Не удалось получить данные с сайта Кто поверит: {request.RequestUri}. {ex.Message}", ex); } } private static bool IsRedirectStatusCode(HttpStatusCode statusCode) { return statusCode == HttpStatusCode.Moved || statusCode == HttpStatusCode.Redirect || statusCode == HttpStatusCode.RedirectMethod || statusCode == HttpStatusCode.TemporaryRedirect || (int)statusCode == 308; } private static Uri ResolveRedirectUri(Uri currentUri, HttpResponseHeaders headers) { if (headers.Location != null) { return headers.Location.IsAbsoluteUri ? headers.Location : new Uri(currentUri, headers.Location); } if (!headers.TryGetValues("Location", out var values)) { return null; } var rawLocation = values.FirstOrDefault(); if (string.IsNullOrWhiteSpace(rawLocation)) { return null; } if (Uri.TryCreate(rawLocation, UriKind.Absolute, out var absoluteUri)) { return absoluteUri; } if (Uri.TryCreate(currentUri, rawLocation, out var relativeUri)) { return relativeUri; } var escaped = Uri.EscapeUriString(rawLocation); if (Uri.TryCreate(escaped, UriKind.Absolute, out absoluteUri)) { return absoluteUri; } return Uri.TryCreate(currentUri, escaped, out relativeUri) ? relativeUri : null; } public void Dispose() { _httpClient.Dispose(); } }