-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathpg_manager.hpp
More file actions
268 lines (235 loc) · 11.7 KB
/
pg_manager.hpp
File metadata and controls
268 lines (235 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#pragma once
#include <compare>
#include <set>
#include <string>
#include <boost/uuid/uuid_io.hpp>
#include <sisl/utility/enum.hpp>
#include <sisl/logging/logging.h>
#include "common.hpp"
namespace homeobject {
ENUM(PGError, uint16_t, UNKNOWN = 1, INVALID_ARG, TIMEOUT, UNKNOWN_PG, NOT_LEADER, UNKNOWN_PEER, UNSUPPORTED_OP,
CRC_MISMATCH, NO_SPACE_LEFT, DRIVE_WRITE_ERROR, RETRY_REQUEST, SHUTTING_DOWN, ROLL_BACK, CANCELLED,
QUORUM_NOT_MET);
ENUM(PGReplaceMemberTaskStatus, uint16_t, COMPLETED = 0, IN_PROGRESS, NOT_LEADER, TASK_ID_MISMATCH, TASK_NOT_FOUND,
UNKNOWN);
// https://github.corp.ebay.com/SDS/nuobject_proto/blob/main/src/proto/pg.proto#L52
ENUM(PGStateMask, uint32_t, HEALTHY = 0, DISK_DOWN = 0x1, SCRUBBING = 0x2, BASELINE_RESYNC = 0x4, INCONSISTENT = 0x8,
REPAIR = 0x10, GC_IN_PROGRESS = 0x20, RESYNCING = 0x40);
struct PGMember {
// Max length is based on homestore::replica_member_info::max_name_len - 1. Last byte is null terminated.
static constexpr uint64_t max_name_len = 127;
explicit PGMember(peer_id_t _id) : id(_id) {}
PGMember(peer_id_t _id, std::string const& _name) : id(_id), name(_name) {
RELEASE_ASSERT(name.size() <= max_name_len, "Name exceeds max length");
}
PGMember(peer_id_t _id, std::string const& _name, int32_t _priority) : id(_id), name(_name), priority(_priority) {
RELEASE_ASSERT(name.size() <= max_name_len, "Name exceeds max length");
}
peer_id_t id;
std::string name;
int32_t priority{0}; // <0 (Arbiter), ==0 (Follower), >0 (F|Leader)
auto operator<=>(PGMember const& rhs) const {
return boost::uuids::hash_value(id) <=> boost::uuids::hash_value(rhs.id);
}
auto operator==(PGMember const& rhs) const { return id == rhs.id; }
};
using MemberSet = std::set< PGMember >;
struct replace_member_task {
std::string task_id; // Unique task id for this replace member operation
uuid_t replica_out; // The replica which is going to be replaced
uuid_t replica_in; // The replica which is going to be added in place of replica_out
};
struct PGInfo {
explicit PGInfo(pg_id_t _id) : id(_id) {}
pg_id_t id;
mutable MemberSet members;
peer_id_t replica_set_uuid;
uint64_t size;
uint64_t chunk_size;
// The expected member count, this is a fixed value decided by pg creation.
uint32_t expected_member_num = 0;
auto operator<=>(PGInfo const& rhs) const { return id <=> rhs.id; }
auto operator==(PGInfo const& rhs) const { return id == rhs.id; }
// check if the PGInfo has same id, size and members with the rhs PGInfo.
bool is_equivalent_to(PGInfo const& rhs) const {
if (id != rhs.id || size != rhs.size || members.size() != rhs.members.size()) { return false; }
for (auto const& m : members) {
auto it = rhs.members.find(m);
if (it == rhs.members.end() || it->priority != m.priority) { return false; }
}
return true;
}
std::string to_string() const {
std::string members_str;
uint32_t i = 0ul;
for (auto const& m : members) {
if (i++ > 0) { members_str += ", "; }
members_str += fmt::format("member-{}: id={}, name={}, priority={}", i, boost::uuids::to_string(m.id),
m.name, m.priority);
}
return fmt::format("PGInfo: id={}, replica_set_uuid={}, size={}, chunk_size={}, "
"expected_member_num={}, members={}",
id, boost::uuids::to_string(replica_set_uuid), size, chunk_size, expected_member_num,
members_str);
}
};
struct peer_info {
peer_id_t id;
std::string name;
uint64_t last_commit_lsn{0}; // last commit lsn from this peer
uint64_t last_succ_resp_us{0};
bool can_vote = true;
};
struct pg_state {
std::atomic< uint64_t > state{0};
explicit pg_state(uint64_t s) : state{s} {}
void set_state(PGStateMask mask) { state.fetch_or(static_cast< uint64_t >(mask), std::memory_order_relaxed); }
void clear_state(PGStateMask mask) { state.fetch_and(~static_cast< uint64_t >(mask), std::memory_order_relaxed); }
bool is_state_set(PGStateMask mask) const {
return (state.load(std::memory_order_relaxed) & static_cast< uint64_t >(mask)) != 0;
}
uint64_t get() const { return state.load(std::memory_order_relaxed); }
};
struct PGStats {
pg_id_t id;
peer_id_t replica_set_uuid;
peer_id_t leader_id; // the leader of this PG from my perspective;
uint32_t num_members; // number of members in this PG;
uint32_t total_shards; // shards allocated on this PG (including open shards)
uint32_t open_shards; // active shards on this PG;
uint32_t avail_open_shards; // total number of shards that could be opened on this PG;
uint64_t used_bytes; // total number of bytes used by all shards on this PG;
uint64_t avail_bytes; // total number of bytes available on this PG;
uint64_t num_active_objects; // total number of active objects on this PG;
uint64_t num_tombstone_objects; // total number of tombstone objects on this PG;
uint64_t pg_state; // PG state;
uint32_t snp_progress; // snapshot progress, the value is set only when the peer is under baseline resync.
uint32_t commit_quorum; // custom commit quorum size; 0 means default majority-based quorum.
std::vector< peer_info > members;
PGStats() :
id{0},
replica_set_uuid{},
leader_id{},
num_members{0},
total_shards{0},
open_shards{0},
avail_open_shards{0},
used_bytes{0},
avail_bytes{0},
num_active_objects{0},
num_tombstone_objects{0},
pg_state{0},
snp_progress{0},
commit_quorum{0},
members{} {}
std::string to_string() {
std::string members_str;
uint32_t i = 0ul;
for (auto const& m : members) {
if (i++ > 0) { members_str += ", "; };
members_str +=
fmt::format("member-{}: id={}, name={}, last_commit_lsn={}, last_succ_resp_us={}, can_vote={}", i,
boost::uuids::to_string(m.id), m.name, m.last_commit_lsn, m.last_succ_resp_us, m.can_vote);
}
return fmt::format(
"PGStats: id={}, replica_set_uuid={}, leader={}, num_members={}, total_shards={}, open_shards={}, "
"avail_open_shards={}, used_bytes={}, avail_bytes={}, members={}",
id, boost::uuids::to_string(replica_set_uuid), boost::uuids::to_string(leader_id), num_members,
total_shards, open_shards, avail_open_shards, used_bytes, avail_bytes, members_str);
}
};
struct PGReplaceMemberStatus {
std::string task_id;
PGReplaceMemberTaskStatus status = PGReplaceMemberTaskStatus::UNKNOWN;
std::vector< peer_info > members;
};
class PGManager : public Manager< PGError > {
public:
virtual NullAsyncResult create_pg(PGInfo&& pg_info, trace_id_t tid = 0) = 0;
virtual NullAsyncResult replace_member(pg_id_t id, std::string& task_id, peer_id_t const& old_member,
PGMember const& new_member, u_int32_t commit_quorum = 0,
trace_id_t tid = 0) = 0;
virtual PGReplaceMemberStatus get_replace_member_status(pg_id_t id, std::string& task_id,
const PGMember& old_member, const PGMember& new_member,
const std::vector< PGMember >& others,
uint64_t trace_id = 0) const = 0;
/**
* Retrieves the statistics for a specific PG (Placement Group) identified by its ID.
*
* @param id The ID of the PG.
* @param stats The reference to the PGStats object where the statistics will be stored.
* @return True if the statistics were successfully retrieved, false otherwise (e.g. id not found).
*/
virtual bool get_stats(pg_id_t id, PGStats& stats) const = 0;
/**
* @brief Retrieves the list of pg_ids.
*
* This function retrieves the list of pg_ids and stores them in the provided vector.
*
* @param pg_ids The vector to store the pg_ids.
*/
virtual void get_pg_ids(std::vector< pg_id_t >& pg_ids) const = 0;
/**
* @brief Destroys a PG (Placement Group) identified by its ID.
* @param pg_id The ID of the PG.
*/
virtual void destroy_pg(pg_id_t pg_id) = 0;
/**
* @brief Single member exits a PG (Placement Group) identified by its ID.
* @param group_id The group ID of the pg.
* @param peer_id The peer ID of the member exiting the PG.
* @param trace_id The trace identifier for logging and tracking purposes.
*/
virtual NullResult exit_pg(uuid_t group_id, peer_id_t peer_id, uint64_t trace_id) = 0;
/**
* @brief Toggle the learner flag for a specified member.
*
* This function changes the state of the learner flag for a given member in the PG.
* It is typically used to revert the learner flag back to false when rolling back pgmove.
*
* @param pg_id The ID of the PG where the member resides.
* @param member_id The ID of the member whose learner flag is to be toggled.
* @param is_learner The new state of the learner flag (true to set as learner, false to unset).
* @param commit_quorum The quorum required for committing the change.
* @param trace_id The trace ID for tracking the operation.
* @return NullAsyncResult indicating the result of the operation.
*/
virtual NullAsyncResult flip_learner_flag(pg_id_t pg_id, peer_id_t const& member_id, bool is_learner,
uint32_t commit_quorum, trace_id_t trace_id) = 0;
/**
* @brief Remove a member from the PG.
*
* This function removes a specified member from the PG, typically used to rollback the pgmove operation.
*
* @param pg_id The ID of the PG from which the member is to be removed.
* @param member_id The ID of the member to be removed.
* @param commit_quorum The quorum required for committing the removal.
* @param trace_id The trace ID for tracking the operation.
* @return NullAsyncResult indicating the result of the operation.
*/
virtual NullAsyncResult remove_member(pg_id_t pg_id, peer_id_t const& member_id, uint32_t commit_quorum,
trace_id_t trace_id) = 0;
/**
* @brief Clean up the replace member task in the PG.
*
* This function cleans up the replace member task, typically used to rollback the pgmove operation.
*
* @param pg_id The ID of the PG where the task is to be cleaned.
* @param task_id The ID of the task to be cleaned.
* @param commit_quorum The quorum required for committing the task cleanup.
* @param trace_id The trace ID for tracking the operation.
* @return NullAsyncResult indicating the result of the operation.
*/
virtual NullAsyncResult clean_replace_member_task(pg_id_t pg_id, std::string& task_id, uint32_t commit_quorum,
trace_id_t trace_id) = 0;
/**
* @brief List all replace member tasks happening on this homeobject instance.
*
* This function retrieves a list of all ongoing tasks on this homeobject instance.
*
* @param trace_id The trace ID for tracking the operation.
* @return Result containing a vector of replace member tasks.
*/
virtual Result< std::vector< replace_member_task > > list_all_replace_member_tasks(trace_id_t trace_id) = 0;
};
} // namespace homeobject