Files
CROWLER/Services/KtoPoveritClient.cs
2026-04-04 10:52:30 +03:00

188 lines
6.3 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using CRAWLER.Configuration;
using Microsoft.Extensions.Configuration;
namespace CRAWLER.Services;
internal sealed class KtoPoveritClient : IDisposable
{
private readonly CrawlerOptions _options;
private readonly HttpClient _httpClient;
public KtoPoveritClient(IConfiguration configuration)
{
_options = configuration.GetSection("Crawler").Get<CrawlerOptions>()
?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json.");
var handler = new SocketsHttpHandler
{
AutomaticDecompression = DecompressionMethods.All,
AllowAutoRedirect = false
};
_httpClient = new HttpClient(handler)
{
Timeout = TimeSpan.FromSeconds(Math.Max(5, _options.TimeoutSeconds))
};
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(_options.UserAgent);
_httpClient.DefaultRequestHeaders.AcceptLanguage.ParseAdd("ru-RU,ru;q=0.9,en-US;q=0.8");
}
public CrawlerOptions Options
{
get { return _options; }
}
public async Task<string> GetStringAsync(string url, CancellationToken cancellationToken)
{
using var request = CreateRequest(url);
using var response = await SendAsync(request, cancellationToken);
return await response.Content.ReadAsStringAsync(cancellationToken);
}
public async Task<byte[]> GetBytesAsync(string url, CancellationToken cancellationToken)
{
using var request = CreateRequest(url);
using var response = await SendAsync(request, cancellationToken);
return await response.Content.ReadAsByteArrayAsync(cancellationToken);
}
public string BuildCatalogPageUrl(int page)
{
var relative = string.Format(_options.CatalogPathFormat, page);
return BuildAbsoluteUrl(relative);
}
public string BuildAbsoluteUrl(string urlOrPath)
{
if (string.IsNullOrWhiteSpace(urlOrPath))
{
return null;
}
if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri))
{
return absoluteUri.ToString();
}
var baseUri = new Uri(_options.BaseUrl.TrimEnd('/') + "/");
return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString();
}
private HttpRequestMessage CreateRequest(string url)
{
return new HttpRequestMessage(HttpMethod.Get, url)
{
Version = HttpVersion.Version11,
VersionPolicy = HttpVersionPolicy.RequestVersionOrLower
};
}
private async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
{
var currentUri = request.RequestUri ?? throw new InvalidOperationException("Не задан URL запроса.");
const int maxRedirects = 10;
try
{
for (var redirectIndex = 0; redirectIndex <= maxRedirects; redirectIndex++)
{
using var currentRequest = CreateRequest(currentUri.ToString());
var response = await _httpClient.SendAsync(currentRequest, HttpCompletionOption.ResponseContentRead, cancellationToken);
if (IsRedirectStatusCode(response.StatusCode))
{
var redirectUri = ResolveRedirectUri(currentUri, response.Headers);
response.Dispose();
if (redirectUri == null)
{
throw new InvalidOperationException(
$"Сайт вернул {(int)response.StatusCode} для {currentUri}, но не прислал корректный адрес перенаправления.");
}
currentUri = redirectUri;
continue;
}
if ((int)response.StatusCode >= 200 && (int)response.StatusCode <= 299)
{
return response;
}
var statusCode = (int)response.StatusCode;
var reasonPhrase = response.ReasonPhrase;
response.Dispose();
throw new HttpRequestException(
$"Response status code does not indicate success: {statusCode} ({reasonPhrase}).");
}
throw new InvalidOperationException(
$"Превышено число перенаправлений ({maxRedirects}) для {currentUri}.");
}
catch (Exception ex)
{
throw new InvalidOperationException(
$"Не удалось получить данные с сайта Кто поверит: {request.RequestUri}. {ex.Message}",
ex);
}
}
private static bool IsRedirectStatusCode(HttpStatusCode statusCode)
{
return statusCode == HttpStatusCode.Moved
|| statusCode == HttpStatusCode.Redirect
|| statusCode == HttpStatusCode.RedirectMethod
|| statusCode == HttpStatusCode.TemporaryRedirect
|| (int)statusCode == 308;
}
private static Uri ResolveRedirectUri(Uri currentUri, HttpResponseHeaders headers)
{
if (headers.Location != null)
{
return headers.Location.IsAbsoluteUri
? headers.Location
: new Uri(currentUri, headers.Location);
}
if (!headers.TryGetValues("Location", out var values))
{
return null;
}
var rawLocation = values.FirstOrDefault();
if (string.IsNullOrWhiteSpace(rawLocation))
{
return null;
}
if (Uri.TryCreate(rawLocation, UriKind.Absolute, out var absoluteUri))
{
return absoluteUri;
}
if (Uri.TryCreate(currentUri, rawLocation, out var relativeUri))
{
return relativeUri;
}
var escaped = Uri.EscapeUriString(rawLocation);
if (Uri.TryCreate(escaped, UriKind.Absolute, out absoluteUri))
{
return absoluteUri;
}
return Uri.TryCreate(currentUri, escaped, out relativeUri)
? relativeUri
: null;
}
public void Dispose()
{
_httpClient.Dispose();
}
}