XRootD
Loading...
Searching...
No Matches
XrdClS3Factory.cc
Go to the documentation of this file.
1/******************************************************************************/
2/* Copyright (C) 2025, Pelican Project, Morgridge Institute for Research */
3/* */
4/* This file is part of the XrdClS3 client plugin for XRootD. */
5/* */
6/* XRootD is free software: you can redistribute it and/or modify it under */
7/* the terms of the GNU Lesser General Public License as published by the */
8/* Free Software Foundation, either version 3 of the License, or (at your */
9/* option) any later version. */
10/* */
11/* XRootD is distributed in the hope that it will be useful, but WITHOUT */
12/* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
13/* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
14/* License for more details. */
15/* */
16/* The copyright holder's institutional names and contributor's names may not */
17/* be used to endorse or promote products derived from this software without */
18/* specific prior written permission of the institution or contributor. */
19/******************************************************************************/
20
21#include "XrdClS3Factory.hh"
22#include "XrdClS3File.hh"
23#include "XrdClS3Filesystem.hh"
24
25#include <openssl/evp.h>
26#include <openssl/hmac.h>
28#include <XrdCl/XrdClLog.hh>
29
30#include <fcntl.h>
31
33
34using namespace XrdClS3;
35
36std::shared_mutex Factory::m_bucket_auth_map_mutex;
37bool Factory::m_initialized = false;
38XrdCl::Log *Factory::m_log{nullptr};
39std::once_flag Factory::m_init_once;
40std::string Factory::m_endpoint = "";
41std::string Factory::m_service = "s3";
42std::string Factory::m_region = "us-east-1";
43std::string Factory::m_url_style = "virtual";
44std::string Factory::m_mkdir_sentinel;
45Factory::Credentials Factory::m_default_creds;
46std::unordered_map<std::string, Factory::Credentials> Factory::m_bucket_location_map;
47std::unordered_map<std::string, std::pair<Factory::Credentials, std::chrono::steady_clock::time_point>> Factory::m_bucket_auth_map;
48
49
50namespace {
51
52std::string
53AmazonURLEncode(const std::string &input) {
54 /*
55 * See
56 * http://docs.amazonwebservices.com/AWSEC2/2010-11-15/DeveloperGuide/using-query-api.html
57 *
58 */
59 std::string output;
60 output.reserve(input.size());
61 for (const auto & val : input) {
62 // "Do not URL encode ... A-Z, a-z, 0-9, hyphen ( - ),
63 // underscore ( _ ), period ( . ), and tilde ( ~ ). Percent
64 // encode all other characters with %XY, where X and Y are hex
65 // characters 0-9 and uppercase A-F. Percent encode extended
66 // UTF-8 characters in the form %XY%ZA..."
67 if (('A' <= val && val <= 'Z') ||
68 ('a' <= val && val <= 'z') ||
69 ('0' <= val && val <= '9') || val == '-' ||
70 val == '_' || val == '.' || val == '~') {
71 output.append(1, val);
72 } else {
73 char percentEncode[4];
74 snprintf(percentEncode, 4, "%%%.2hhX", val);
75 output.append(percentEncode);
76 }
77 }
78 return output;
79}
80
81}
82
84 std::call_once(m_init_once, [&] {
86 if (!m_log) {
87 return;
88 }
89 m_log->SetTopicName(kLogXrdClS3, "XrdClS3");
90
91 auto env = XrdCl::DefaultEnv::GetEnv();
92 if (!env) {
93 return;
94 }
95 InitS3Config();
96 m_initialized = true;
97 });
98}
99
100std::string
101Factory::CanonicalizeQueryString(const std::string &url) {
102 auto loc = url.find("://");
103 if (loc == std::string::npos) {
104 return "";
105 }
106 loc += 3; // Skip the "://"
107 loc = url.find('?', loc);
108 if (loc == std::string::npos) {
109 return "";
110 }
111 std::vector<std::pair<std::string, std::string>> query_parameters;
112 auto param_end = url.find('&', loc);
113 while (loc != std::string::npos) {
114 auto param_start = loc + 1; // Skip the '?' / '&'
115 loc = url.find('=', param_start);
116 if (loc == param_start) {
117 // Empty parameter name, skip
118 }
119 else if (loc >= param_end) {
120 auto param = url.substr(param_start, param_end - param_start);
121 if (!param.empty()) {
122 // No '=' found, treat as a parameter without value
123 query_parameters.emplace_back(AmazonURLEncode(param), "");
124 }
125 } else {
126 std::string name = url.substr(param_start, loc - param_start);
127 loc++; // Move past '='
128 auto value_start = loc;
129 std::string value;
130 if (param_end == std::string::npos) {
131 value = url.substr(value_start);
132 } else {
133 value = url.substr(value_start, param_end - value_start);
134 }
135 if (!value.empty()) {
136 query_parameters.emplace_back(AmazonURLEncode(name), AmazonURLEncode(value));
137 }
138 }
139 loc = param_end;
140 if (loc != std::string::npos) {
141 param_end = url.find('&', loc + 1);
142 }
143 }
144 std::sort(query_parameters.begin(), query_parameters.end(),
145 [](const auto &a, const auto &b) { return a.first < b.first; });
146
147 size_t string_size = 0;
148 for (const auto &param : query_parameters) {
149 string_size += param.first.size() + param.second.size() + 2;
150 }
151 std::string canonicalQueryString;
152 if (string_size) {
153 canonicalQueryString.reserve(string_size);
154 }
155 for (const auto &param : query_parameters) {
156
157 // Step 1C: Separate parameter names from values with '='.
158 canonicalQueryString += param.first + '=' + param.second;
159
160 // Step 1D: Separate name-value pairs with '&';
161 canonicalQueryString += '&';
162 }
163 // We'll always have a superflous trailing ampersand.
164 if (!canonicalQueryString.empty()) {
165 canonicalQueryString.erase(canonicalQueryString.end() - 1);
166 }
167 return canonicalQueryString;
168}
169
171Factory::CreateFile(const std::string & /*url*/) {
172 if (!m_initialized) {return nullptr;}
173 return new File(m_log);
174}
175
177Factory::CreateFileSystem(const std::string & url) {
178 if (!m_initialized) {return nullptr;}
179 return new Filesystem(url, m_log);
180}
181
182namespace {
183
184void SetDefault(XrdCl::Env *env, const std::string &optName, const std::string &envName, std::string &location, const std::string &def) {
185 std::string val;
186 if (!env->GetString(optName, val) || val.empty()) {
187 env->PutString(optName, "");
188 env->ImportString(optName, envName);
189 }
190 if (env->GetString(optName, val) && !val.empty()) {
191 location = val;
192 } else {
193 location = def;
194 }
195}
196
197// Trim the left side of a string_view for space
198std::string_view ltrim_view(const std::string_view input_view) {
199 for (size_t idx = 0; idx < input_view.size(); idx++) {
200 if (!isspace(input_view[idx])) {
201 return input_view.substr(idx);
202 }
203 }
204 return "";
205}
206
207bool ComputeSHA256(const std::string_view payload, std::vector<unsigned char> &messageDigest) {
208 EVP_MD_CTX *mdctx = EVP_MD_CTX_create();
209 if (mdctx == NULL) {
210 return false;
211 }
212
213 if (!EVP_DigestInit_ex(mdctx, EVP_sha256(), NULL)) {
214 EVP_MD_CTX_destroy(mdctx);
215 return false;
216 }
217
218 if (!EVP_DigestUpdate(mdctx, payload.data(), payload.length())) {
219 EVP_MD_CTX_destroy(mdctx);
220 return false;
221 }
222
223 unsigned int mdLength;
224 if (!EVP_DigestFinal_ex(mdctx, messageDigest.data(), &mdLength)) {
225 EVP_MD_CTX_destroy(mdctx);
226 return false;
227 }
228 messageDigest.resize(mdLength);
229
230 EVP_MD_CTX_destroy(mdctx);
231 return true;
232}
233
234void MessageDigestAsHex(const std::vector<unsigned char> messageDigest,
235 std::string &hexEncoded) {
236 hexEncoded.resize(messageDigest.size() * 2);
237 char *ptr = hexEncoded.data();
238 for (unsigned int idx = 0; idx < messageDigest.size(); ++idx, ptr += 2) {
239 snprintf(ptr, 3, "%02x", messageDigest[idx]);
240 }
241}
242
243// Helper function to read a file descriptor until EOF or
244// `nbytes` bytes have been read.
245// Includes appropriate handling of EINTR.
246ssize_t FullRead(int fd, void *ptr, size_t nbytes) {
247 ssize_t nleft, nread;
248
249 nleft = nbytes;
250 while (nleft > 0) {
251 REISSUE_READ:
252 nread = read(fd, ptr, nleft);
253 if (nread < 0) {
254 if (errno == EINTR) {
255 goto REISSUE_READ;
256 }
257 return -1;
258 } else if (nread == 0) {
259 break;
260 }
261 nleft -= nread;
262 ptr = static_cast<char *>(ptr) + nread;
263 }
264 return (nbytes - nleft);
265}
266
267// Read a file into a string.
268// If the file is larger than 32k, it will return false.
269bool
270ReadShortFile(const std::string &fileName, std::string &contents, std::string &err_msg) {
271 int fd = open(fileName.c_str(), O_RDONLY, 0600);
272 if (fd < 0) {
273 err_msg = "Failed to open file '" + fileName + "': " + std::string(strerror(errno));
274 return false;
275 }
276 contents.resize(32*1024);
277
278 auto totalRead = FullRead(fd, contents.data(), contents.size());
279 close(fd);
280 if (totalRead == -1) {
281 err_msg = "Failed to read file '" + fileName + "': " + std::string(strerror(errno));
282 return false;
283 }
284 contents.resize(totalRead);
285 return true;
286}
287
288} // namespace
289
290std::string
291Factory::CleanObjectName(const std::string & input_obj) {
292 std::string obj = input_obj;
293 auto loc = input_obj.find('?');
294 if (loc != std::string::npos) {
295 auto query = std::string_view(input_obj).substr(loc + 1);
296 obj = obj.substr(0, loc);
297 bool added_query = false;
298 while (!query.empty()) {
299 auto next_query_loc = query.find('&');
300 auto current_query = (next_query_loc == std::string::npos) ? query : query.substr(0, next_query_loc);
301 query = (next_query_loc == std::string::npos) ? "" : query.substr(next_query_loc + 1);
302 if (current_query.empty()) {
303 continue;
304 }
305 auto equal_loc = current_query.find('=');
306 if (equal_loc != std::string::npos) {
307 auto key = current_query.substr(0, equal_loc);
308 if (key != "authz") {
309 obj += (added_query ? "&" : "?") + std::string(current_query);
310 added_query = true;
311 }
312 } else if (current_query != "authz") {
313 obj += (added_query ? "&" : "?") + std::string(current_query);
314 added_query = true;
315 }
316 }
317 } else {
318 obj = input_obj;
319 }
320 return obj;
321}
322
323std::string_view
324Factory::ExtractHostname(const std::string_view url) {
325 auto loc = url.find("://");
326 if (loc == std::string_view::npos) {
327 return {};
328 }
329 loc += 3; // Move past "://"
330 auto slash_loc = url.find('/', loc);
331 auto query_loc = url.find('?', loc);
332 if (query_loc != std::string_view::npos && (slash_loc == std::string_view::npos || query_loc < slash_loc)) {
333 slash_loc = query_loc; // If there's a query, we stop at it
334 }
335 auto authority = url.substr(loc, slash_loc - loc);
336 if (authority.empty()) {
337 return {};
338 }
339 auto at_loc = authority.find('@');
340 if (at_loc != std::string_view::npos) {
341 // If there's an '@', we have user info, so we skip it
342 authority = authority.substr(at_loc + 1);
343 }
344 // If the authority contains a port, we need to strip it
345 auto colon_loc = authority.find(':');
346 if (colon_loc != std::string_view::npos) {
347 authority = authority.substr(0, colon_loc);
348 }
349 return authority;
350}
351
352void
353Factory::InitS3Config()
354{
355 auto env = XrdCl::DefaultEnv::GetEnv();
356 SetDefault(env, "XrdClS3MkdirSentinel", "XRDCLS3_MKDIRSENTINEL", m_mkdir_sentinel, ".xrdcls3.dirsentinel");
357 SetDefault(env, "XrdClS3Endpoint", "XRDCLS3_ENDPOINT", m_endpoint, "");
358 SetDefault(env, "XrdClS3UrlStyle", "XRDCLS3_URLSTYLE", m_url_style, "virtual");
359 SetDefault(env, "XrdClS3Region", "XRDCLS3_REGION", m_region, "us-east-1");
360 std::string access_key;
361 SetDefault(env, "XrdClS3AccessKeyLocation", "XRDCLS3_ACCESSKEYLOCATION", access_key, "");
362 std::string secret_key;
363 SetDefault(env, "XrdClS3SecretKeyLocation", "XRDCLS3_SECRETKEYLOCATION", secret_key, "");
364 if (!access_key.empty() && !secret_key.empty()) {
365 m_default_creds = {access_key, secret_key};
366 } else if (access_key.empty() && secret_key.empty()) {
367 m_log->Info(kLogXrdClS3, "Defaulting to public bucket access");
368 } else if (access_key.empty() && !secret_key.empty()) {
369 m_log->Warning(kLogXrdClS3, "Secret key location set (%s) but access key location is empty; authorization will not work.", secret_key.c_str());
370 } else if (!access_key.empty() && secret_key.empty()) {
371 m_log->Warning(kLogXrdClS3, "Access key location set (%s) but secret key location is empty; authorization will not work.", access_key.c_str());
372 }
373
374 // Parse the per-bucket configuration of credentials.
375 std::string bucket_configs;
376 SetDefault(env, "XrdClS3BucketConfigs", "XRDCLS3_BUCKETCONFIGS", bucket_configs, "");
377 if (!bucket_configs.empty()) {
378 std::stringstream ss(bucket_configs);
379 std::string config_name;
380 while (std::getline(ss, config_name)) {
381 auto name = TrimView(config_name);
382 auto bucket_name_key = std::string("XrdClS3") + std::string(name) + "BucketName";
383 std::string bucket_name_val;
384 if (!env->GetString(bucket_name_key, bucket_name_val) || bucket_name_val.empty()) {
385 m_log->Warning(kLogXrdClS3, "Per-bucket config includes entry '%s' but XrdClS3%sBucketName is not set", std::string(name).c_str(), std::string(name).c_str());
386 continue;
387 }
388 auto access_key_location_key = std::string("XrdClS3") + std::string(name) + "AccessKeyLocation";
389 std::string access_key_location_val;
390 auto has_access_key = env->GetString(access_key_location_key, access_key_location_val) && !access_key_location_val.empty();
391
392 auto secret_key_location_key = std::string("XrdClS3") + std::string(name) + "SecretKeyLocation";
393 std::string secret_key_location_val;
394 auto has_secret_key = env->GetString(secret_key_location_key, secret_key_location_val) && !secret_key_location_val.empty();
395
396 if (has_access_key && has_secret_key) {
397 m_bucket_location_map[bucket_name_val] = {access_key_location_val, secret_key_location_val};
398 } else if (!has_access_key && !has_secret_key) {
399 // If both are empty, then it is implicitly a public bucket.
400 m_bucket_location_map[bucket_name_val] = {"", ""};
401 } else if (has_access_key && !has_secret_key) {
402 m_log->Warning(kLogXrdClS3, "Per-bucket config for entry '%s' has an access key location set (%s) but no secret key", std::string(name).c_str(), access_key_location_val.c_str());
403 } else {
404 m_log->Warning(kLogXrdClS3, "Per-bucket config for entry '%s' has an secret key location set (%s) but no access key", std::string(name).c_str(), secret_key_location_val.c_str());
405 }
406 }
407 }
408}
409
410bool
411Factory::GenerateHttpUrl(const std::string &s3_url, std::string &https_url, std::string *obj_result, std::string &err_msg) {
412 if (s3_url.substr(0, 5) != "s3://") {
413 err_msg = "Provided URL does not start with s3://";
414 return false;
415 }
416 auto loc = s3_url.find('/', 5);
417 auto bucket = s3_url.substr(5, loc - 5);
418 auto at_loc = bucket.find('@');
419 if (at_loc != std::string::npos) {
420 std::string login = "";
421 login = bucket.substr(0, at_loc);
422 bucket = bucket.substr(at_loc + 1);
423 }
424 std::string endpoint = m_endpoint;
425 std::string region = m_region;
426 if ((bucket == m_endpoint) || m_endpoint.empty()) {
427 endpoint = bucket;
428 auto old_loc = loc + 1;
429 loc = s3_url.find('/', loc + 1);
430 if (loc == std::string::npos) {
431 err_msg = "Provided S3 URL does not contain a bucket in path";
432 return false;
433 }
434 bucket = s3_url.substr(old_loc, loc - old_loc);
435 } else {
436 auto authority = ExtractHostname(s3_url);
437 std::string test_endpoint = "." + endpoint;
438 if (!m_region.empty()) {
439 auto bucket_loc = authority.rfind("." + m_region + test_endpoint);
440 if (bucket_loc != std::string::npos) {
441 bucket = authority.substr(0, bucket_loc);
442 } else {
443 auto bucket_loc = authority.rfind(test_endpoint);
444 if (bucket_loc != std::string::npos) {
445 bucket = authority.substr(0, bucket_loc);
446 }
447 }
448 } else {
449 auto bucket_loc = authority.rfind(test_endpoint);
450 if (bucket_loc != std::string::npos) {
451 bucket = authority.substr(0, bucket_loc);
452 }
453 }
454 }
455 std::string obj;
456 if (loc != std::string::npos) {
457 obj = s3_url.substr(loc + 1);
458 }
459 // Strip out "authz" query parameters; those are internal to XRootD.
460 obj = CleanObjectName(obj);
461 if (obj_result) {
462 *obj_result = obj;
463 }
464 if (m_url_style == "virtual" || m_url_style.empty()) {
465 https_url = "https://" + bucket + "." + m_region + "." + endpoint + (obj_result ? "" : ("/" + obj));
466 return true;
467 } else if (m_url_style == "path") {
468 if (m_region.empty()) {
469 https_url = "https://" + m_region + "." + endpoint + "/" + bucket + (obj_result ? "" : ("/" + obj));
470 } else {
471 https_url = "https://" + endpoint + "/" + bucket + (obj_result ? "" : ("/" + obj));
472 }
473 return true;
474 } else {
475 err_msg = "Server configuration has invalid setting for URL style";
476 return false;
477 }
478}
479
480bool
481Factory::GenerateV4Signature(const std::string &url, const std::string &verb, std::vector<std::pair<std::string, std::string>> &headers, std::string &auth_token, std::string &err_msg) {
482 auto bucket = GetBucketFromHttpsUrl(url);
483
484 // If we're using temporary credentials, we need to add the token
485 // header here as well. We set saKey and keyID here (well before
486 // necessary) since we'll get them for free when we get the token.
487 auto [keyId, secretKey, ok] = GetCredentialsForBucket(bucket, err_msg);
488 if (!ok) {
489 return false;
490 }
491
492 if (secretKey.empty()) {
493 auth_token = "";
494 return true;
495 }
496
497 //
498 // Create task 1's inputs.
499 //
500
501 auto canonicalURI = PathEncode(url);
502
503 // The canonical query string is the alphabetically sorted list of
504 // URI-encoded parameter names '=' values, separated by '&'s.
505 auto canonicalQueryString = CanonicalizeQueryString(url);
506
507 // The canonical headers must include the Host header, so add that
508 // now if we don't have it.
509 if (std::find_if(headers.begin(), headers.end(),
510 [](const auto &pair) { return pair.first == "Host"; }) == headers.end()) {
511 auto host = ExtractHostname(url);
512 if (host.empty()) {
513 err_msg = "Unable to extract hostname from URL: " + url;
514 return false;
515 }
516 headers.emplace_back("Host", host);
517 }
518
519 // S3 complains if x-amz-date isn't signed, so do this early.
520 auto iter = std::find_if(headers.begin(), headers.end(),
521 [](const auto &pair) { return !strcasecmp(pair.first.c_str(), "X-Amz-Date"); });
522 std::string date_time;
523 char date_char[] = "YYYYMMDD";
524 if (iter == headers.end()) {
525 time_t now;
526 time(&now);
527 struct tm brokenDownTime;
528 gmtime_r(&now, &brokenDownTime);
529
530 date_time = "YYYYMMDDThhmmssZ";
531 strftime(date_time.data(), date_time.size(), "%Y%m%dT%H%M%SZ", &brokenDownTime);
532 headers.emplace_back("X-Amz-Date", date_time);
533 strftime(date_char, sizeof(date_char), "%Y%m%d", &brokenDownTime);
534 } else {
535 date_time = iter->second;
536 auto loc = date_time.find('T', 0);
537 if (loc != 8) {
538 err_msg = "Invalid value for X-Amz-Date";
539 return false;
540 }
541 memcpy(date_char, date_time.c_str(), 8);
542 }
543
544 // The canonical payload hash is the lowercase hexadecimal string of the
545 // (SHA256) hash value of the payload or "UNSIGNED-PAYLOAD" if
546 // we are not signing the payload.
547 std::string payload_hash = "UNSIGNED-PAYLOAD";
548 iter = std::find_if(headers.begin(), headers.end(),
549 [](const auto &pair) { return !strcasecmp(pair.first.c_str(), "X-Amz-Content-Sha256"); });
550 if (iter == headers.end()) {
551 headers.emplace_back("X-Amz-Content-Sha256", payload_hash);
552 } else {
553 payload_hash = iter->second;
554 }
555
556 // The canonical list of headers is a sorted list of lowercase header
557 // names paired via ':' with the trimmed header value, each pair
558 // terminated with a newline.
559 std::vector<std::pair<std::string, std::string>> transformed_headers;
560 transformed_headers.reserve(headers.size());
561 for (const auto &info : headers) {
562 std::string header = info.first;
563 std::transform(header.begin(), header.end(), header.begin(), &tolower);
564
565 std::string value = info.second;
566 if (value.empty()) {
567 continue;
568 }
569 auto value_trimmed = std::string(TrimView(value));
570
571 // Convert internal runs of spaces into single spaces.
572 unsigned left = 1;
573 unsigned right = 1;
574 bool inSpaces = false;
575 while (right < value_trimmed.length()) {
576 if (!inSpaces) {
577 if (value_trimmed[right] == ' ') {
578 inSpaces = true;
579 left = right;
580 ++right;
581 } else {
582 ++right;
583 }
584 } else {
585 if (value_trimmed[right] == ' ') {
586 ++right;
587 } else {
588 inSpaces = false;
589 value_trimmed.erase(left, right - left - 1);
590 right = left + 1;
591 }
592 }
593 }
594
595 transformed_headers.emplace_back(header, value);
596 }
597 std::sort(transformed_headers.begin(), transformed_headers.end(),
598 [](const auto &a, const auto &b) { return a.first < b.first; });
599
600 // The canonical list of signed headers is trivial to generate while
601 // generating the list of headers.
602 std::string signedHeaders, canonicalHeaders;
603 for (const auto &info : transformed_headers) {
604 canonicalHeaders += info.first + ":" + info.second + "\n";
605 signedHeaders += info.first + ";";
606 }
607 signedHeaders.erase(signedHeaders.end() - 1);
608
609 // Task 1: create the canonical request.
610 auto canonicalRequest =
611 verb + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" +
612 canonicalHeaders + "\n" + signedHeaders + "\n" + payload_hash;
613
614 //
615 // Create task 2's inputs.
616 //
617
618 // Hash the canonical request the way we did the payload.
619 std::string canonicalRequestHash;
620 std::vector<unsigned char> messageDigest;
621 messageDigest.resize(EVP_MAX_MD_SIZE);
622 if (!ComputeSHA256(canonicalRequest, messageDigest)) {
623 err_msg = "Unable to hash canonical request.";
624 return false;
625 }
626 MessageDigestAsHex(messageDigest, canonicalRequestHash);
627
628 // Task 2: create the string to sign.
629 auto credentialScope = std::string(date_char) + "/" + m_region + "/" + m_service + "/aws4_request";
630 auto stringToSign = std::string("AWS4-HMAC-SHA256\n") + date_time + "\n" + credentialScope + "\n" + canonicalRequestHash;
631
632 //
633 // Creating task 3's inputs was done when we checked to see if we needed
634 // to get the security token, since they come along for free when we do.
635 //
636
637 // Task 3: calculate the signature.
638 auto saKey = std::string("AWS4") + secretKey;
639 unsigned int mdLength = 0;
640 const unsigned char *hmac =
641 HMAC(EVP_sha256(), saKey.c_str(), saKey.length(), (unsigned char *)date_char,
642 sizeof(date_char) - 1, messageDigest.data(), &mdLength);
643 if (hmac == NULL) {
644 err_msg = "Unable to calculate HMAC for date.";
645 return false;
646 }
647
648 unsigned int md2Length = 0;
649 unsigned char messageDigest2[EVP_MAX_MD_SIZE];
650 hmac = HMAC(EVP_sha256(), messageDigest.data(), mdLength,
651 reinterpret_cast<unsigned char *>(m_region.data()), m_region.size(), messageDigest2,
652 &md2Length);
653 if (hmac == NULL) {
654 err_msg = "Unable to calculate HMAC for region.";
655 return false;
656 }
657
658 hmac = HMAC(EVP_sha256(), messageDigest2, md2Length,
659 reinterpret_cast<unsigned char *>(m_service.data()), m_service.size(), messageDigest.data(),
660 &mdLength);
661 if (hmac == NULL) {
662 err_msg = "Unable to calculate HMAC for service.";
663 return false;
664 }
665
666 const char request_char[] = "aws4_request";
667 hmac = HMAC(EVP_sha256(), messageDigest.data(), messageDigest.size(), reinterpret_cast<const unsigned char *>(request_char),
668 sizeof(request_char) - 1, messageDigest2, &md2Length);
669 if (hmac == NULL) {
670 err_msg = "Unable to calculate HMAC for request.";
671 return false;
672 }
673
674 hmac = HMAC(EVP_sha256(), messageDigest2, md2Length,
675 reinterpret_cast<unsigned char *>(stringToSign.data()),
676 stringToSign.size(), messageDigest.data(), &mdLength);
677 if (hmac == NULL) {
678 err_msg = "Unable to calculate HMAC for request string.";
679 return false;
680 }
681
682 std::string signature;
683 MessageDigestAsHex(messageDigest, signature);
684
685 auth_token =
686 std::string("AWS4-HMAC-SHA256 Credential=") + keyId + "/" + credentialScope +
687 ",SignedHeaders=" + signedHeaders + ",Signature=" + signature;
688 return true;
689}
690
691std::string
692Factory::GetBucketFromHttpsUrl(const std::string &url) {
693 if (m_url_style == "virtual" || m_url_style.empty()) {
694 // Virtual-hosted-style URLs are of the form https://bucket.region.endpoint/object
695 auto hostname = ExtractHostname(url);
696 if (hostname.empty()) {
697 return {};
698 }
699 auto test_endpoint = "." + m_endpoint;
700 if (!m_region.empty()) test_endpoint = "." + m_region + test_endpoint;
701 auto loc = hostname.rfind(test_endpoint);
702 if (loc == std::string::npos) {
703 if (!m_region.empty()) {
704 loc = hostname.rfind("." + m_endpoint);
705 if (loc != std::string::npos) {
706 return std::string(hostname.substr(0, loc));
707 }
708 }
709 return {};
710 }
711 return std::string(hostname.substr(0, loc));
712 } else if (m_url_style == "path") {
713 // Path style URLs are of the form https://region.endpoint/bucket/object
714 auto loc = url.find("://");
715 if (loc == std::string::npos) {
716 return {};
717 }
718 loc += 3; // Move past "://"
719 auto slash_loc = url.find('/', loc);
720 if (slash_loc == std::string::npos) {
721 return {};
722 }
723 auto bucket_start = slash_loc + 1;
724 auto bucket_end = url.find('/', bucket_start);
725 if (bucket_end == std::string::npos) {
726 return url.substr(bucket_start);
727 }
728 return url.substr(bucket_start, bucket_end - bucket_start);
729 } else {
730 // Invalid URL style
731 return {};
732 }
733}
734
735std::tuple<std::string, std::string, bool>
736Factory::GetCredentialsForBucket(const std::string &bucket, std::string &err_msg)
737{
738 auto now = std::chrono::steady_clock::now();
739 {
740 std::shared_lock lock(m_bucket_auth_map_mutex);
741 auto iter = m_bucket_auth_map.find(bucket);
742 if (iter != m_bucket_auth_map.end()) {
743 // If we have credentials for this bucket, check if they are still valid.
744 auto &creds = iter->second.first;
745 auto &expiration = iter->second.second;
746 if (now < expiration) {
747 // Credentials are still valid, return them.
748 return {creds.m_accesskey, creds.m_secretkey, true};
749 }
750 }
751 }
752
753 std::unique_lock lock(m_bucket_auth_map_mutex);
754 auto iter = m_bucket_location_map.find(bucket);
755 std::string access_key_location, secret_key_location;
756 if (iter == m_bucket_location_map.end()) {
757 // If we don't have credentials for this bucket, use the default.
758 if (m_default_creds.m_accesskey.empty() || m_default_creds.m_secretkey.empty()) {
759 // No credentials at all, so we assume public access.
760 m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::minutes(1)};
761 return {"", "", true};
762 }
763 access_key_location = m_default_creds.m_accesskey;
764 secret_key_location = m_default_creds.m_secretkey;
765 } else {
766 access_key_location = iter->second.m_accesskey;
767 secret_key_location = iter->second.m_secretkey;
768 }
769 if (access_key_location.empty() && secret_key_location.empty()) {
770 // If both are empty, we assume public access.
771 m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::minutes(1)};
772 return {"", "", true};
773 }
774 if (access_key_location.empty() || secret_key_location.empty()) {
775 err_msg = "No credentials available for bucket: " + bucket;
776 m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
777 return {"", "", false};
778 }
779
780 std::string access_key, secret_key;
781 if (!ReadShortFile(access_key_location, access_key, err_msg)) {
782 m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
783 return {"", "", false};
784 }
785 access_key = TrimView(access_key);
786
787 if (!ReadShortFile(secret_key_location, secret_key, err_msg)) {
788 m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
789 return {"", "", false};
790 }
791 secret_key = TrimView(secret_key);
792
793 if (access_key.empty() || secret_key.empty()) {
794 err_msg = "Credentials for bucket '" + bucket + "' are empty.";
795 m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
796 return {"", "", false};
797 }
798 m_bucket_auth_map[bucket] = {{access_key, secret_key}, now + std::chrono::minutes(1)};
799 return {access_key, secret_key, true};
800}
801
802std::string
803Factory::PathEncode(const std::string_view url) {
804 auto loc = url.find("://");
805 if (loc == std::string_view::npos) {
806 return "";
807 }
808 auto path_loc = url.find("/", loc + 3);
809 auto query_loc = url.find("?", loc + 3);
810 if (query_loc != std::string_view::npos && (path_loc == std::string_view::npos || query_loc < path_loc)) {
811 // No path, just a query string
812 return "/";
813 }
814 auto path = url.substr(path_loc, query_loc - path_loc);
815 std::string segment;
816 std::string encoded;
817
818 size_t next = 0;
819 size_t offset = 0;
820 const auto length = path.size();
821 while (offset < length) {
822 next = strcspn(path.data() + offset, "/");
823 if (next == 0) {
824 encoded += "/";
825 offset += 1;
826 continue;
827 }
828 if (offset + next >= length) {
829 next = length - offset;
830 }
831
832 segment = std::string(path.data() + offset, next);
833 encoded += AmazonURLEncode(segment);
834
835 offset += next;
836 }
837 return encoded;
838}
839
840// Trim left and right side of a string_view for space characters
841std::string_view
842Factory::TrimView(const std::string_view input_view) {
843 auto view = ltrim_view(input_view);
844 for (size_t idx = 0; idx < input_view.size(); idx++) {
845 if (!isspace(view[view.size() - 1 - idx])) {
846 return view.substr(0, view.size() - idx);
847 }
848 }
849 return "";
850}
851
852extern "C"
853{
854 void *XrdClGetPlugIn(const void*)
855 {
856 return static_cast<void*>(new Factory());
857 }
858}
void * XrdClGetPlugIn(const void *)
void * XrdClGetPlugIn(const void *)
XrdVERSIONINFO(XrdClGetPlugIn, XrdClGetPlugIn) using namespace XrdClS3
#define close(a)
Definition XrdPosix.hh:48
#define open
Definition XrdPosix.hh:76
#define read(a, b, c)
Definition XrdPosix.hh:82
XrdOucString File
virtual XrdCl::FilePlugIn * CreateFile(const std::string &url) override
Create a file plug-in for the given URL.
static std::string_view ExtractHostname(const std::string_view url)
static std::string PathEncode(const std::string_view url)
static std::string CleanObjectName(const std::string &object)
static bool GenerateHttpUrl(const std::string &s3_url, std::string &https_url, std::string *obj_result, std::string &err_msg)
virtual XrdCl::FileSystemPlugIn * CreateFileSystem(const std::string &url) override
Create a file system plug-in for the given URL.
static std::tuple< std::string, std::string, bool > GetCredentialsForBucket(const std::string &bucket, std::string &err_msg)
static bool GenerateV4Signature(const std::string &url, const std::string &verb, std::vector< std::pair< std::string, std::string > > &headers, std::string &auth_token, std::string &err_msg)
static std::string_view TrimView(const std::string_view str)
static std::string GetBucketFromHttpsUrl(const std::string &url)
static Log * GetLog()
Get default log.
static Env * GetEnv()
Get default client environment.
bool PutString(const std::string &key, const std::string &value)
Definition XrdClEnv.cc:52
bool ImportString(const std::string &key, const std::string &shellKey)
Definition XrdClEnv.cc:214
bool GetString(const std::string &key, std::string &value)
Definition XrdClEnv.cc:31
An interface for file plug-ins.
An interface for file plug-ins.
Handle diagnostics.
Definition XrdClLog.hh:101
std::string_view ltrim_view(const std::string_view &input_view)
const uint64_t kLogXrdClS3