188 lines
6.3 KiB
C#
188 lines
6.3 KiB
C#
using System.Net;
|
||
using System.Net.Http;
|
||
using System.Net.Http.Headers;
|
||
using CRAWLER.Configuration;
|
||
using Microsoft.Extensions.Configuration;
|
||
|
||
namespace CRAWLER.Services;
|
||
|
||
internal sealed class KtoPoveritClient : IDisposable
|
||
{
|
||
private readonly CrawlerOptions _options;
|
||
private readonly HttpClient _httpClient;
|
||
|
||
public KtoPoveritClient(IConfiguration configuration)
|
||
{
|
||
_options = configuration.GetSection("Crawler").Get<CrawlerOptions>()
|
||
?? throw new InvalidOperationException("Раздел Crawler не найден в appsettings.json.");
|
||
|
||
var handler = new SocketsHttpHandler
|
||
{
|
||
AutomaticDecompression = DecompressionMethods.All,
|
||
AllowAutoRedirect = false
|
||
};
|
||
|
||
_httpClient = new HttpClient(handler)
|
||
{
|
||
Timeout = TimeSpan.FromSeconds(Math.Max(5, _options.TimeoutSeconds))
|
||
};
|
||
_httpClient.DefaultRequestHeaders.UserAgent.ParseAdd(_options.UserAgent);
|
||
_httpClient.DefaultRequestHeaders.AcceptLanguage.ParseAdd("ru-RU,ru;q=0.9,en-US;q=0.8");
|
||
}
|
||
|
||
public CrawlerOptions Options
|
||
{
|
||
get { return _options; }
|
||
}
|
||
|
||
public async Task<string> GetStringAsync(string url, CancellationToken cancellationToken)
|
||
{
|
||
using var request = CreateRequest(url);
|
||
using var response = await SendAsync(request, cancellationToken);
|
||
return await response.Content.ReadAsStringAsync(cancellationToken);
|
||
}
|
||
|
||
public async Task<byte[]> GetBytesAsync(string url, CancellationToken cancellationToken)
|
||
{
|
||
using var request = CreateRequest(url);
|
||
using var response = await SendAsync(request, cancellationToken);
|
||
return await response.Content.ReadAsByteArrayAsync(cancellationToken);
|
||
}
|
||
|
||
public string BuildCatalogPageUrl(int page)
|
||
{
|
||
var relative = string.Format(_options.CatalogPathFormat, page);
|
||
return BuildAbsoluteUrl(relative);
|
||
}
|
||
|
||
public string BuildAbsoluteUrl(string urlOrPath)
|
||
{
|
||
if (string.IsNullOrWhiteSpace(urlOrPath))
|
||
{
|
||
return null;
|
||
}
|
||
|
||
if (Uri.TryCreate(urlOrPath, UriKind.Absolute, out var absoluteUri))
|
||
{
|
||
return absoluteUri.ToString();
|
||
}
|
||
|
||
var baseUri = new Uri(_options.BaseUrl.TrimEnd('/') + "/");
|
||
return new Uri(baseUri, urlOrPath.TrimStart('/')).ToString();
|
||
}
|
||
|
||
private HttpRequestMessage CreateRequest(string url)
|
||
{
|
||
return new HttpRequestMessage(HttpMethod.Get, url)
|
||
{
|
||
Version = HttpVersion.Version11,
|
||
VersionPolicy = HttpVersionPolicy.RequestVersionOrLower
|
||
};
|
||
}
|
||
|
||
private async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||
{
|
||
var currentUri = request.RequestUri ?? throw new InvalidOperationException("Не задан URL запроса.");
|
||
const int maxRedirects = 10;
|
||
|
||
try
|
||
{
|
||
for (var redirectIndex = 0; redirectIndex <= maxRedirects; redirectIndex++)
|
||
{
|
||
using var currentRequest = CreateRequest(currentUri.ToString());
|
||
var response = await _httpClient.SendAsync(currentRequest, HttpCompletionOption.ResponseContentRead, cancellationToken);
|
||
|
||
if (IsRedirectStatusCode(response.StatusCode))
|
||
{
|
||
var redirectUri = ResolveRedirectUri(currentUri, response.Headers);
|
||
response.Dispose();
|
||
|
||
if (redirectUri == null)
|
||
{
|
||
throw new InvalidOperationException(
|
||
$"Сайт вернул {(int)response.StatusCode} для {currentUri}, но не прислал корректный адрес перенаправления.");
|
||
}
|
||
|
||
currentUri = redirectUri;
|
||
continue;
|
||
}
|
||
|
||
if ((int)response.StatusCode >= 200 && (int)response.StatusCode <= 299)
|
||
{
|
||
return response;
|
||
}
|
||
|
||
var statusCode = (int)response.StatusCode;
|
||
var reasonPhrase = response.ReasonPhrase;
|
||
response.Dispose();
|
||
throw new HttpRequestException(
|
||
$"Response status code does not indicate success: {statusCode} ({reasonPhrase}).");
|
||
}
|
||
|
||
throw new InvalidOperationException(
|
||
$"Превышено число перенаправлений ({maxRedirects}) для {currentUri}.");
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
throw new InvalidOperationException(
|
||
$"Не удалось получить данные с сайта Кто поверит: {request.RequestUri}. {ex.Message}",
|
||
ex);
|
||
}
|
||
}
|
||
|
||
private static bool IsRedirectStatusCode(HttpStatusCode statusCode)
|
||
{
|
||
return statusCode == HttpStatusCode.Moved
|
||
|| statusCode == HttpStatusCode.Redirect
|
||
|| statusCode == HttpStatusCode.RedirectMethod
|
||
|| statusCode == HttpStatusCode.TemporaryRedirect
|
||
|| (int)statusCode == 308;
|
||
}
|
||
|
||
private static Uri ResolveRedirectUri(Uri currentUri, HttpResponseHeaders headers)
|
||
{
|
||
if (headers.Location != null)
|
||
{
|
||
return headers.Location.IsAbsoluteUri
|
||
? headers.Location
|
||
: new Uri(currentUri, headers.Location);
|
||
}
|
||
|
||
if (!headers.TryGetValues("Location", out var values))
|
||
{
|
||
return null;
|
||
}
|
||
|
||
var rawLocation = values.FirstOrDefault();
|
||
if (string.IsNullOrWhiteSpace(rawLocation))
|
||
{
|
||
return null;
|
||
}
|
||
|
||
if (Uri.TryCreate(rawLocation, UriKind.Absolute, out var absoluteUri))
|
||
{
|
||
return absoluteUri;
|
||
}
|
||
|
||
if (Uri.TryCreate(currentUri, rawLocation, out var relativeUri))
|
||
{
|
||
return relativeUri;
|
||
}
|
||
|
||
var escaped = Uri.EscapeUriString(rawLocation);
|
||
if (Uri.TryCreate(escaped, UriKind.Absolute, out absoluteUri))
|
||
{
|
||
return absoluteUri;
|
||
}
|
||
|
||
return Uri.TryCreate(currentUri, escaped, out relativeUri)
|
||
? relativeUri
|
||
: null;
|
||
}
|
||
|
||
public void Dispose()
|
||
{
|
||
_httpClient.Dispose();
|
||
}
|
||
}
|