Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions sled-agent/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,18 @@ async fn services_put(
// times out) could result in leaving zones partially configured and the
// in-memory state of the service manager invalid. See:
// oxidecomputer/omicron#3098.
match tokio::spawn(async move { sa.services_ensure(body_args).await }).await
{
let handler = async move {
match sa.services_ensure(body_args).await {
Ok(()) => Ok(()),
Err(e) => {
// Log the error here to make things clear even if the client
// has already disconnected.
error!(sa.logger(), "failed to initialize services: {e}");
Err(e)
}
}
};
match tokio::spawn(handler).await {
Ok(result) => result.map_err(|e| Error::from(e))?,

Err(e) => {
Expand Down
96 changes: 65 additions & 31 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ use sled_hardware::underlay::BOOTSTRAP_PREFIX;
use sled_hardware::Baseboard;
use sled_hardware::SledMode;
use slog::Logger;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::io::Cursor;
Expand Down Expand Up @@ -362,7 +363,7 @@ pub struct ServiceManagerInner {
switch_zone_maghemite_links: Vec<PhysicalLink>,
sidecar_revision: SidecarRevision,
// Zones representing running services
zones: Mutex<Vec<RunningZone>>,
zones: Mutex<BTreeMap<String, RunningZone>>,
underlay_vnic_allocator: VnicAllocator<Etherstub>,
underlay_vnic: EtherstubVnic,
bootstrap_vnic_allocator: VnicAllocator<Etherstub>,
Expand Down Expand Up @@ -431,7 +432,7 @@ impl ServiceManager {
time_synced: AtomicBool::new(false),
sidecar_revision,
switch_zone_maghemite_links,
zones: Mutex::new(vec![]),
zones: Mutex::new(BTreeMap::new()),
underlay_vnic_allocator: VnicAllocator::new(
"Service",
underlay_etherstub,
Expand Down Expand Up @@ -1908,7 +1909,7 @@ impl ServiceManager {
// Populates `existing_zones` according to the requests in `services`.
async fn initialize_services_locked(
&self,
existing_zones: &mut Vec<RunningZone>,
existing_zones: &mut BTreeMap<String, RunningZone>,
requests: &Vec<ZoneRequest>,
) -> Result<(), Error> {
if let Some(name) = requests
Expand All @@ -1923,16 +1924,17 @@ impl ServiceManager {
});
}

// We initialize all the zones we can, but only return the first error.
// We initialize all the zones we can, but only return one error, if
// any.
let local_existing_zones = Arc::new(Mutex::new(existing_zones));
let first_err = Arc::new(Mutex::new(None));
let last_err = Arc::new(Mutex::new(None));
stream::iter(requests)
// WARNING: Do not use "try_for_each_concurrent" here -- if you do,
// it's possible that the future will cancel other ongoing requests
// to "initialize_zone".
.for_each_concurrent(None, |request| {
let local_existing_zones = local_existing_zones.clone();
let first_err = first_err.clone();
let last_err = last_err.clone();
async move {
match self
.initialize_zone(
Expand All @@ -1943,20 +1945,20 @@ impl ServiceManager {
.await
{
Ok(running_zone) => {
local_existing_zones
.lock()
.await
.push(running_zone);
local_existing_zones.lock().await.insert(
running_zone.name().to_string(),
running_zone,
);
}
Err(err) => {
*first_err.lock().await = Some(err);
*last_err.lock().await = Some(err);
}
}
}
})
.await;

if let Some(err) = Arc::into_inner(first_err)
if let Some(err) = Arc::into_inner(last_err)
.expect("Should have last reference")
.into_inner()
{
Expand Down Expand Up @@ -2245,9 +2247,7 @@ impl ServiceManager {
.map_err(Error::from);
}
}
if let Some(zone) =
self.inner.zones.lock().await.iter().find(|z| z.name() == name)
{
if let Some(zone) = self.inner.zones.lock().await.get(name) {
return self
.create_zone_bundle_impl(zone)
.await
Expand Down Expand Up @@ -2424,9 +2424,14 @@ impl ServiceManager {
{
zone_names.push(String::from(zone.name()))
}
for zone in self.inner.zones.lock().await.iter() {
zone_names.push(String::from(zone.name()));
}
zone_names.extend(
self.inner
.zones
.lock()
.await
.values()
.map(|zone| zone.name().to_string()),
);
zone_names.sort();
Ok(zone_names)
}
Expand Down Expand Up @@ -2480,7 +2485,7 @@ impl ServiceManager {
// re-instantiated on boot.
async fn ensure_all_services(
&self,
existing_zones: &mut MutexGuard<'_, Vec<RunningZone>>,
existing_zones: &mut MutexGuard<'_, BTreeMap<String, RunningZone>>,
old_request: &AllZoneRequests,
request: ServiceEnsureBody,
) -> Result<AllZoneRequests, Error> {
Expand All @@ -2502,11 +2507,7 @@ impl ServiceManager {
// Destroy zones that should not be running
for zone in zones_to_be_removed {
let expected_zone_name = zone.zone_name();
if let Some(idx) = existing_zones
.iter()
.position(|z| z.name() == expected_zone_name)
{
let mut zone = existing_zones.remove(idx);
if let Some(mut zone) = existing_zones.remove(&expected_zone_name) {
if let Err(e) = zone.stop().await {
error!(log, "Failed to stop zone {}: {e}", zone.name());
}
Expand All @@ -2520,6 +2521,36 @@ impl ServiceManager {
let all_u2_roots =
self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await;
for zone in zones_to_be_added {
// Check if we think the zone should already be running
let name = zone.zone_name();
if existing_zones.contains_key(&name) {
// Make sure the zone actually exists in the right state too
match Zones::find(&name).await {
Ok(Some(zone)) if zone.state() == zone::State::Running => {
info!(log, "skipping running zone"; "zone" => &name);
continue;
}
_ => {
// Mismatch between SA's view and reality, let's try to
// clean up any remanants and try initialize it again
warn!(
log,
"expected to find existing zone in running state";
"zone" => &name,
);
if let Err(e) =
existing_zones.remove(&name).unwrap().stop().await
{
error!(
log,
"Failed to stop zone";
"zone" => &name,
"error" => %e,
);
}
Comment on lines +2533 to +2550

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call to do this cleanup here

}
}
}
// For each new zone request, we pick an arbitrary U.2 to store
// the zone filesystem. Note: This isn't known to Nexus right now,
// so it's a local-to-sled decision.
Expand Down Expand Up @@ -2552,7 +2583,7 @@ impl ServiceManager {
pub async fn cockroachdb_initialize(&self) -> Result<(), Error> {
let log = &self.inner.log;
let dataset_zones = self.inner.zones.lock().await;
for zone in dataset_zones.iter() {
for zone in dataset_zones.values() {
// TODO: We could probably store the ZoneKind in the running zone to
// make this "comparison to existing zones by name" mechanism a bit
// safer.
Expand Down Expand Up @@ -2608,7 +2639,10 @@ impl ServiceManager {
Ok(())
}

pub fn boottime_rewrite(&self, zones: &Vec<RunningZone>) {
pub fn boottime_rewrite<'a>(
&self,
zones: impl Iterator<Item = &'a RunningZone>,
) {
if self
.inner
.time_synced
Expand All @@ -2626,7 +2660,6 @@ impl ServiceManager {
info!(self.inner.log, "Setting boot time to {:?}", now);

let files: Vec<Utf8PathBuf> = zones
.iter()
.map(|z| z.root())
.chain(iter::once(Utf8PathBuf::from("/")))
.flat_map(|r| [r.join("var/adm/utmpx"), r.join("var/adm/wtmpx")])
Expand Down Expand Up @@ -2655,7 +2688,7 @@ impl ServiceManager {

if let Some(true) = self.inner.skip_timesync {
info!(self.inner.log, "Configured to skip timesync checks");
self.boottime_rewrite(&existing_zones);
self.boottime_rewrite(existing_zones.values());
return Ok(TimeSync { sync: true, skew: 0.00, correction: 0.00 });
};

Expand All @@ -2664,8 +2697,9 @@ impl ServiceManager {

let ntp_zone = existing_zones
.iter()
.find(|z| z.name().starts_with(&ntp_zone_name))
.ok_or_else(|| Error::NtpZoneNotReady)?;
.find(|(name, _)| name.starts_with(&ntp_zone_name))
.ok_or_else(|| Error::NtpZoneNotReady)?
.1;

// XXXNTP - This could be replaced with a direct connection to the
// daemon using a patched version of the chrony_candm crate to allow
Expand All @@ -2687,7 +2721,7 @@ impl ServiceManager {
&& correction.abs() <= 0.05;

if sync {
self.boottime_rewrite(&existing_zones);
self.boottime_rewrite(existing_zones.values());
}

Ok(TimeSync { sync, skew, correction })
Expand Down
6 changes: 6 additions & 0 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ impl SledAgentInner {
#[derive(Clone)]
pub struct SledAgent {
inner: Arc<SledAgentInner>,
log: Logger,
}

impl SledAgent {
Expand Down Expand Up @@ -351,6 +352,7 @@ impl SledAgent {
// Also, we could maybe de-dup some of the backoff code in the request queue?
nexus_request_queue: NexusRequestQueue::new(),
}),
log: log.clone(),
};

// We immediately add a notification to the request queue about our
Expand Down Expand Up @@ -475,6 +477,10 @@ impl SledAgent {
self.inner.id
}

pub fn logger(&self) -> &Logger {
&self.log
}

// Sends a request to Nexus informing it that the current sled exists.
fn notify_nexus_about_self(&self, log: &Logger) {
let sled_id = self.inner.id;
Expand Down
2 changes: 1 addition & 1 deletion tools/create_virtual_hardware.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ function ensure_zpools {
echo "Zpool: [$ZPOOL]"
VDEV_PATH="${ZPOOL_VDEV_DIR:-$OMICRON_TOP}/$ZPOOL.vdev"
if ! [[ -f "$VDEV_PATH" ]]; then
dd if=/dev/zero of="$VDEV_PATH" bs=1 count=0 seek=10G
dd if=/dev/zero of="$VDEV_PATH" bs=1 count=0 seek=15G
fi
success "ZFS vdev $VDEV_PATH exists"
if [[ -z "$(zpool list -o name | grep $ZPOOL)" ]]; then
Expand Down
2 changes: 1 addition & 1 deletion tools/destroy_virtual_hardware.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ function try_destroy_zpools {
for ZPOOL in "${ZPOOLS[@]}"; do
VDEV_FILE="${ZPOOL_VDEV_DIR:-$OMICRON_TOP}/$ZPOOL.vdev"
zfs destroy -r "$ZPOOL" && \
zfs unmount "$ZPOOL" && \
(zfs unmount "$ZPOOL" || true) && \
zpool destroy "$ZPOOL" && \
rm -f "$VDEV_FILE" || \
warn "Failed to remove ZFS pool and vdev: $ZPOOL"
Expand Down