From bf2cb5637d9d0c54de842a24deaa48b0c20123ac Mon Sep 17 00:00:00 2001 From: gposluns Date: Tue, 31 Jan 2023 21:05:45 +0000 Subject: [PATCH 1/3] updated README with gcc version I used to finally get this working --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c1faf88..9139fbb 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Setup Dependencies: - Linux on x86_64: We've tested with Ubuntu 14.04, 16.04, and 18.04. If you want to run this in a VM, see Vagrant setup below. -- GCC: Version 4.8 or newer will suffice to build the simulator itself, which +- GCC: Version >= 4.8 and <= 9.5 will suffice to build the simulator itself, which is written in C++11 and depends on a particular GCC ABI. (Clang won't work.) Test applications are written in C++14 so they can build with GCC 5+ or Clang. - Pin version 2.14: Download this from From 311800afd2991ac78a71dd178fd8b364475684c8 Mon Sep 17 00:00:00 2001 From: gposluns Date: Mon, 6 Feb 2023 01:40:59 +0000 Subject: [PATCH 2/3] fixes for ubuntu up to 22.04 --- sim/virt/virt.cpp | 33 +++++++++++++++++++++++++++++++++ tests/SConscript | 19 +++++++++++-------- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/sim/virt/virt.cpp b/sim/virt/virt.cpp index 4e6ec0f..4d3ba40 100644 --- a/sim/virt/virt.cpp +++ b/sim/virt/virt.cpp @@ -27,6 +27,7 @@ /* This file was adapted from zsim. */ #include +#include #include "sim/log.h" #include "sim/sim.h" #include "sim/virt/virt.h" @@ -57,7 +58,38 @@ bool syscallEnter(spin::ThreadId tid, spin::ThreadContext* ctxt) { uint64_t syscall = spin::getReg(ctxt, REG_RAX); DEBUG("[%d] syscall %ld", tid, syscall); + // glibc version 2.28+, if built with GCC's -fcf-protection, will have + // init_cpu_features() (which runs early on during the execution of any + // process) attempt to call the nonexisting ARCH_CET_STATUS (0x3001) + // subfunction of arch_prctl. See: + // https://sourceware.org/git/?p=glibc.git;a=commit;h=394df3815e8ceec750fd06583eee4896174ce808 + // This became the default in Ubuntu 19.10+. See: + // https://wiki.ubuntu.com/ToolChain/CompilerFlags#A-fcf-protection + // Pin v2.14 crashes when it sees this unexpected arch_prctl subfunction. + // Avoid the crash by just pretending to execute the syscall instruction + // while skipping over it. + if (syscall == SYS_arch_prctl && spin::getReg(ctxt, REG_RDI) == 0x3001) { + DEBUG("[%d] ignoring prtcl", tid); + spin::setReg(ctxt, REG::REG_RIP, + spin::getReg(ctxt, REG::REG_RIP) + + 2/*bytes in fast system call instruction*/); + spin::setReg(ctxt, REG::REG_RAX, + -1UL/*indicates failure of syscall, as glibc expects*/); + return false; + } + + //clone3 syscall used by glibc in ubuntu 22.04 when spawning threads fails + //here, but will fallback to clone if errno is ENOSYS + // + //So pretend to fail with this errno, similar to above + if (syscall == SYS_clone3) { + spin::setReg(ctxt, REG_RAX, -ENOSYS); + spin::setReg(ctxt, REG_RIP, spin::getReg(ctxt, REG_RIP) + 2); + return false; + } + if (!IsInFastForward()) { + DEBUG("[%d] non-ff syscall", tid); // Perform reads/writes to syscall input/output data to reflect its memory // behavior. This avoids conflicts on syscall data. if (syscall == SYS_read) { @@ -98,6 +130,7 @@ bool syscallEnter(spin::ThreadId tid, spin::ThreadContext* ctxt) { default: break; } if (keepThreadCaptured) syncSyscallTid = tid; + DEBUG("[%d] returning %d", tid, !keepThreadCaptured); return !keepThreadCaptured; } diff --git a/tests/SConscript b/tests/SConscript index 9ed3616..5d81c7a 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -19,20 +19,23 @@ env.Append(CFLAGS = ['-std=c11', '-Wstrict-prototypes']) # https://wiki.debian.org/Hardening # Let's fight the nanny distros and try to get more consistent behavior # by disabling new security-related compiler features. -# -# Ubuntu 14.10 and later systems enable -fstack-protector-strong by default. -# This generates extra accesses to thread-local storage to get canary values -# that are placed on the stack. We want to avoid these memory accesses, -# which incur needless conflict checks: env.Append(CPPFLAGS = ['-fno-stack-protector']) -# Ubuntu 16.10 and later systems enable PIE by default, which is incompatible -# with the local labels within inline asm in the Swarm runtime. if not GetOption('clang'): env.Append(CPPFLAGS = ['-no-pie', '-fno-PIE']) env.Append(LINKFLAGS = ['-no-pie', '-fno-PIE']) -# Ubuntu 19.10 enables -fstack-clash-protection and -fcf-protection by default. +print('Checking whether compiler supports stack-clash prevention and Intel CET...') # These flags don't exist prior to GCC 8. +testEnv = env.Clone() #env.Append(CPPFLAGS = ['-fno-stack-clash-protection', '-fcf-protection=none']) +testEnv.Append(CPPFLAGS = ['-fno-stack-clash-protection', '-fcf-protection=none']) +conf = Configure(testEnv) +flagsAreSupported = conf.CheckCXX() +testEnv = conf.Finish() +if flagsAreSupported: + env = testEnv +else: + # CheckCXX() printed a scary message upon failure. Tell the user not to worry. + print('^That failure is good. Continuing with build.') env.Append(LIBS = ['pthread']) From 8ebe3517b91100d2610cc9bbdccc90abdcf39905 Mon Sep 17 00:00:00 2001 From: gposluns Date: Tue, 7 Feb 2023 20:33:53 +0000 Subject: [PATCH 3/3] copied relaxed swarm implementation used for Hive paper --- sim/init/init.cpp | 2 ++ sim/rob.cpp | 7 ++++--- sim/robtypes.h | 4 ++-- sim/sim.cpp | 8 +++++--- sim/sim.h | 2 ++ sim/task.h | 2 +- 6 files changed, 16 insertions(+), 9 deletions(-) diff --git a/sim/init/init.cpp b/sim/init/init.cpp index b202d33..a06fbe8 100644 --- a/sim/init/init.cpp +++ b/sim/init/init.cpp @@ -916,6 +916,8 @@ static void InitSystem(const Config& config) { initinfo->usePreciseAddressSets = ("Precise" == std::string( config.get("sys.robs.addressSet.type", "Bloom"))); + initinfo->relaxed = config.get("sys.robs.relaxedOrder", false); + uint32_t maxFrameDepth = config.get("sys.robs.maxFrameDepth", UINT32_MAX); //FIXME(victory): Remove this hack when we have a less-broken diff --git a/sim/rob.cpp b/sim/rob.cpp index 6ab3482..8751c1b 100644 --- a/sim/rob.cpp +++ b/sim/rob.cpp @@ -1017,10 +1017,10 @@ TaskPtr ROB::removeUntiedTaskImpl(const uint64_t taskFn, // FIXME(dsm): Use multi-index::range() auto start = boost::make_reverse_iterator(runQueue.lower_bound( - std::make_tuple(std::cref(maxTS), false))); + std::make_tuple(std::cref(maxTS), 0, false))); auto end = boost::make_reverse_iterator(runQueue.lower_bound( - std::make_tuple(std::cref(minTS), false))); + std::make_tuple(std::cref(minTS), 0, false))); // For liveness: we don't want to spill the minimum task. // Spilling that task would certainly induce a swarm::requeuer(...) of @@ -1284,7 +1284,8 @@ std::pair ROB::taskToRun(ThreadID tid) { // Find all tasks with the same timestamp, including all producers TimeStamp ubTs = runQueue.min()->lts(); ubTs.clearTieBreaker(); - auto ub = runQueue.upper_bound(std::make_tuple(std::cref(ubTs), true)); + auto ub = runQueue.upper_bound(std::make_tuple(std::cref(ubTs), + runQueue.min()->softTs, true)); ub = std::prev(ub); // Manually check if std::distance(rq.begin(), ub) < underflow. diff --git a/sim/robtypes.h b/sim/robtypes.h index 93a073f..edc1759 100644 --- a/sim/robtypes.h +++ b/sim/robtypes.h @@ -37,11 +37,11 @@ using ExecQ_ty = fixed_capacity_ordered_set< // The RunQ sorts tasks by their timestamp and deprioritizes producers. It uses // a global_fun key extractor to allow calls to lower/upper_bound to use keys. -using RunQ_key_ty = std::tuple; +using RunQ_key_ty = std::tuple; inline RunQ_key_ty getRunQKey(const TaskPtr& t) { // Given an equal choice between running a programmer-defined producer and a // requeuer, choose the normal producer - return std::make_tuple(std::cref(t->lts()), + return std::make_tuple(std::cref(t->lts()), t->softTs, (t->isProducer() << 1) | t->isRequeuer()); } using RunQ_ty = ordered_pointer_set>, RunQ_key_ty>; diff --git a/sim/sim.cpp b/sim/sim.cpp index f218247..3cd6d4d 100644 --- a/sim/sim.cpp +++ b/sim/sim.cpp @@ -605,7 +605,8 @@ spin::ThreadId HandleSetGvtMagicOp(spin::ThreadId tid, uint64_t cycle, spin::Thr } static void DispatchTaskToContext(const Task& task, spin::ThreadContext* ctxt) { - spin::setReg(ctxt, REG::REG_RDI, task.ts.app()); + if (task.softTs) spin::setReg(ctxt, REG::REG_RDI, task.softTs); + else spin::setReg(ctxt, REG::REG_RDI, task.ts.app()); const uint32_t numArgs = task.args.size(); constexpr REG regs[] = {REG::REG_RSI, REG::REG_RDX, REG::REG_RCX, @@ -875,7 +876,7 @@ spin::ThreadId HandleEnqueueMagicOp(const uint64_t op, const bool requeuer = op & EnqFlags::REQUEUER; const bool maySpec = op & EnqFlags::MAYSPEC; const bool cantSpec = op & EnqFlags::CANTSPEC; - const bool isSoftPrio = op & EnqFlags::ISSOFTPRIO; + const bool isSoftPrio = op & EnqFlags::ISSOFTPRIO || ossinfo->relaxed; const bool runOnAbort = op & EnqFlags::RUNONABORT; const bool noTimestamp = op & EnqFlags::NOTIMESTAMP || runOnAbort; const bool nonSerialHint = op & EnqFlags::NONSERIALHINT; @@ -1016,13 +1017,14 @@ spin::ThreadId HandleEnqueueMagicOp(const uint64_t op, assert(curThread->tid == parent->runningTid); assert(curThread->state != BLOCKED); curThread->core->finishTask(curThread->tid); + if (ossinfo->relaxed) parent->softTs = tsApp; GetCurRob().yieldTask(parent, // Advance the requeuer's timestamp if its next minimum // child is timestamped, otherwise be conservative and reuse // the requeuer's old timestamp. // N.B. if an ordinary requeuer is yielding while enqueuing // a frame requeuer, we are really using the tsApp value. - noTimestamp ? parent->ts.app() : tsApp); + noTimestamp || isSoftPrio ? parent->ts.app() : tsApp); curThread->task = nullptr; assert(curThread->rspCheckpoint); diff --git a/sim/sim.h b/sim/sim.h index 714df7d..113d97a 100644 --- a/sim/sim.h +++ b/sim/sim.h @@ -155,6 +155,8 @@ struct GlobSimInfo { std::vector robs; std::vector tsbs; std::vector throttlers; + + bool relaxed; }; extern const GlobSimInfo* ossinfo; diff --git a/sim/task.h b/sim/task.h index a4553c4..e9c0311 100644 --- a/sim/task.h +++ b/sim/task.h @@ -165,7 +165,7 @@ class Task : public std::enable_shared_from_this { // Unordered tasks indicated as using soft priority will take on the programmer- // specified timestamp as their soft timestamp, meaning that the tasks will be // dequeued in the soft timestamp order. - const uint64_t softTs; + uint64_t softTs; // Must this task be executed speculatively? Can it be run speculatively, // but also non-speculatively when its parent commits and assuming perfect