z, ? | toggle help (this) |
space, → | next slide |
shift-space, ← | previous slide |
d | toggle debug mode |
## <ret> | go to slide # |
c, t | table of contents (vi) |
f | toggle footer |
r | reload slides |
n | toggle notes |
p | run preshow |
P | toggle pause |
I am a math student and programming enthusiast constantly seeking for improvement.
d2
: a library-based approach
dyno
: a dynamic analysis librarymutex A, B;
thread t1([&] {
scoped_lock a(A);
scoped_lock b(B);
});
thread t2([&] {
scoped_lock b(B);
scoped_lock a(A);
});
mutex A, B, C;
thread t1([&] {
scoped_lock a(A);
scoped_lock b(B);
});
thread t2([&] {
scoped_lock b(B);
scoped_lock c(C);
});
thread t3([&] {
scoped_lock c(C);
scoped_lock a(A);
});
Very few variations in thread scheduling usually happens for different runs of the same code in the same conditions. For this reason, odds are that rare deadlocks still make it to production and only happen under "extreme" conditions.
d2
: a library-based approachd2
needs to record 4 types of eventsboost::BasicLockable
boost::Lockable
boost::TimedLockable
class untracked_mutex {
public:
void lock();
void unlock();
};
typedef d2::basic_lockable<untracked_mutex> mutex;
d2::recursive_basic_lockable
d2::recursive_lockable
d2::recursive_timed_lockable
class mutex
: d2::trackable_sync_object<d2::non_recursive>
{
public:
void some_method_to_lock() {
// normal code
this->notify_lock();
}
void some_method_to_unlock() {
// normal code
this->notify_unlock();
}
};
class untracked_thread {
// ...
};
typedef d2::standard_thread<untracked_thread> thread;
class thread : d2::trackable_thread<thread> {
public:
template <typename F, typename ...Args>
void some_method_to_start(F&& f, Args&& ...args) {
typedef d2::thread_function<F> F_;
F_ f_ = this->get_thread_function(f);
// normal code using F_ and f_
}
};
class thread : d2::trackable_thread<thread> {
public:
void some_method_to_join() {
// normal code
this->notify_join();
}
};
class thread : d2::trackable_thread<thread> {
public:
void some_method_to_detach() {
// normal code
this->notify_detach();
}
};
thread(thread&& other);
thread& operator=(thread&& other);
friend void swap(thread& a, thread& b);
namespace d2 { namespace core {
notify_acquire(std::size_t thread, std::size_t lock);
notify_release(std::size_t thread, std::size_t lock);
notify_recursive_acquire(std::size_t thread, std::size_t lock);
notify_recursive_release(std::size_t thread, std::size_t lock);
notify_start(std::size_t parent, std::size_t child);
notify_join(std::size_t parent, std::size_t child);
}}
$ cat my_program/1 # thread 1
22 serialization::archive 10 0 0 4 0 0 0 0 1 0 0 2 0 0 0 0 0 1 0 0 2 0 0 0 0 0 0 0 1 3 0 0 0 1 4 0 0 0 1 5 0 0 0 1 6 0 0 0 1 7 0 0 0 1 8 0 0 0 1 9 0 0 0 1 10 0 0 0 1 11 0 0 0 1 12 0 0 0 1 13 0 0 0 1 14 0 0 0 1 15 0 0 0 1 16 0 0 0 1 17 0 0 0 1 18 0 0 0 1 19 0 0 0 1 20 0 0 0 1 21 0 0 0 1 22 0 0 0 1 23 0 0 0 1 24 0 0 0 1 25 0 0 0 1 26 0 0 0 1 27 0 0 0 1 28 0 0 0 1 29 0 0 0 1 30 0 0 0 1 31 0 0 0 1 32 0 0 0 1 33 0 0 0 1 34 0 0 0 1 35 0 0 0 1 36 0 0 0 1 37 0 0 0 1 38 0 0 0 1 39 0 0 0 1 40 0 0 0 1 41 0 0 0 1 42 0 0 0 1 43 0 0 0 1 44 0 0 0 1 45 0 0 0 1 46 0 0 0 1 47 0 0 0 1 48 0 0 0 1 49 0 0 0 1 50 0 0 0 1 51 0 0 0 1 52 0 0 0 1 53 0 0 0 1 54 0 0 0 1 55 0 0 0 1 56 0 0 0 1 57 0 0 0 1 58 0 0 0 1 59 0 0 0 1 60 0 0 0 1 61 0 0 0 1 62 0 0 0 1 63 0 0 0 1 64 0 0 0 1 65 0 0 0 1 66 0 0 0 1 67 0 0 0 1 68 0 0 0 1 69 0 0 0 1 70 0 0 0 1 71 0 0 0 1 72 0 0 0 1 73 0 0 0 1 74 0 0 0 1 75 0 0 0 1 76 0 0 0 1 77 0 0 0 1 78 0 0 0 1 79 0 0 0 1 80 0 0 0 1 81 0 0 0 1 82 0 0 0 1 83 0 0 0 1 84 0 0 0 1 85 0 0 0 1 86 0 0 0 1 87 0 0 0 1 88 0 0 0 1 89 0 0 0 1 90 0 0 0 1 91 0 0 0 1 92 0 0 0 1 93 0 0 0 1 94 0 0 0 1 95 0 0 0 1 96 0 0 0 1 97 0 0 0 1 98 0 0 0 1 99 0 0 0 1 100 0 0 0 1 101 0 0 1 0 0 1 101 0 0 1 1 100 0 0 1 1 99 0 0 1 1 98 0 0 1 1 97 0 0 1 1 96 0 0 1 1 95 0 0 1 1 94 0 0 1 1 93 0 0 1 1 92 0 0 1 1 91 0 0 1 1 90 0 0 1 1 89 0 0 1 1 88 0 0 1 1 87 0 0 1 1 86 0 0 1 1 85 0 0 1 1 84 0 0 1 1 83 0 0 1 1 82 0 0 1 1 81 0 0 1 1 80 0 0 1 1 79 0 0 1 1 78 0 0 1 1 77 0 0 1 1 76 0 0 1 1 75 0 0 1 1 74 0 0 1 1 73 0 0 1 1 72 0 0 1 1 71 0 0 1 1 70 0 0 1 1 69 0 0 1 1 68 0 0 1 1 67 0 0 1 1 66 0 0 1 1 65 0 0 1 1 64 0 0 1 1 63 0 0 1 1 62 0 0 1 1 61 0 0 1 1 60 0 0 1 1 59 0 0 1 1 58 0 0 1 1 57 0 0 1 1 56 0 0 1 1 55 0 0 1 1 54 0 0 1 1 53 0 0 1 1 52 0 0 1 1 51 0 0 1 1 50 0 0 1 1 49 0 0 1 1 48 0 0 1 1 47 0 0 1 1 46 0 0 1 1 45 0 0 1 1 44 0 0 1 1 43 0 0 1 1 42 0 0 1 1 41 0 0 1 1 40 0 0 1 1 39 0 0 1 1 38 0 0 1 1 37 0 0 1 1 36 0 0 1 1 35 0 0 1 1 34 0 0 1 1 33 0 0 1 1 32 0 0 1 1 31 0 0 1 1 30 0 0 1 1 29 0 0 1 1 28 0 0 1 1 27 0 0 1 1 26 0 0 1 1 25 0 0 1 1 24 0 0 1 1 23 0 0 1 1 22 0 0 1 1 21 0 0 1 1 20 0 0 1 1 19 0 0 1 1 18 0 0 1 1 17 0 0 1 1 16 0 0 1 1 15 0 0 1 1 14 0 0 1 1 13 0 0 1 1 12 0 0 1 1 11 0 0 1 1 10 0 0 1 1 9 0 0 1 1 8 0 0 1 1 7 0 0 1 1 6 0 0 1 1 5 0 0 1 1 4 0 0 1 1 3 0 0 1 1 2 0 0
d2tool loads the events, constructs the graphs and performs the analysis. Also mention that this output was cropped and edited a bit for clarity.
d2tool
speaks that mumbo jumbo$ d2tool --analyze myprogram
in thread #X started at [location]:
holds object #Y acquired at [location]
holds object #Z acquired at [location]
...
tries to acquire object #W at [location]
in thread #XX started at [location]:
holds object #YY acquired at [location]
holds object #ZZ acquired at [location]
...
tries to acquire object #WW at [location]
$ d2tool --analyze myprogram
in thread #2 started at [no location information]:
holds object #1 acquired at
[...]/scenario_ABBA main::$_1::operator()() const
[...]/scenario_ABBA boost::detail::function::void_function_obj_invoker0<main::$_1, void>::invoke(boost::detail::function::function_buffer&)
[...]/scenario_ABBA boost::function0<void>::operator()() const
[...]/scenario_ABBA d2mock::thread::impl::impl(boost::function<void ()> const&)::'lambda'()::operator()() const
[...]/scenario_ABBA d2::thread_function<d2mock::thread::impl::impl(boost::function<void ()> const&)::'lambda'()>::result<d2::thread_function<d2mock::thread::impl::impl(boost::function<void ()> const&)::'lambda'()> ()>::type d2::thread_function<d2mock::thread::impl::impl(boost::function<void ()> const&)::'lambda'()>::operator()<>()
[...]/scenario_ABBA boost::detail::thread_data<d2::thread_function<d2mock::thread::impl::impl(boost::function<void ()> const&)::'lambda'()> >::run()
[...]/libboost_thread-mt.dylib thread_proxy
[...]/libsystem_c.dylib _pthread_start
[...]/libsystem_c.dylib thread_start
I am not the author of the algorithm. It is presented in:
"Detection of deadlock potentials in multithreaded programs"
IBM Journal of Research and Development, vol.54, no.5, pp.3:1,3:15
Sept.-Oct. 2010
For brevity, unlocking mutexes and joining threads will often be omitted.
When omitted, assume the mutexes are unlocked in reverse order of locking
and threads are joined in reverse order of starting.
mutex A, B;
A.lock();
B.lock();
B.unlock();
A.unlock();
mutex A, B;
A.lock();
B.lock();
thread t1([] {});
thread t2([] {});
t2.join();
t1.join();
thread t1([] {});
thread t2([] {});
Specify that we'll be adding annotation on the graph to encode more information like segmentation and gatelocks. Precise that we'll start with the most basic graph and add annotations as we go to improve the algorithm.
u
to v
means that a thread acquired v
while holding u
Two nodes are created because we created two locks, but no edges are created because we did not lock anything.
mutex A, B;
No edge is created; the main thread does not hold anything when it acquires A
.
mutex A, B;
A.lock();
The main thread holds A
when it acquires B
; we add an edge from A
to B
.
mutex A, B;
A.lock();
B.lock();
The graph is not modified on releases.
mutex A, B;
A.lock();
B.lock();
B.unlock();
A.unlock();
Since there is already an edge from A to B in the graph, we don't add it redundantly if a thread locks B again while holding A.
We don't add redundant edges.
mutex A, B;
A.lock();
B.lock();
B.unlock();
A.unlock();
A.lock();
B.lock();
mutex A, B, C, D;
A.lock();
B.lock();
We're really computing the transitive closure of the "is held by a thread when acquiring X" relation.
mutex A, B, C, D;
A.lock();
B.lock();
C.lock();
We're really computing the transitive closure of the "is held by a thread when acquiring X" relation.
mutex A, B, C, D;
A.lock();
B.lock();
C.lock();
D.lock();
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
// ...
Clearly, there is a cycle in the graph iff two locks were acquired in some order and then acquired in a different order.
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
thread t2([&] {
B.lock();
A.lock();
});
mutex A, B, C, D;
thread t1([&]{
A.lock();
B.lock();
C.lock();
D.lock();
});
// ...
mutex A, B, C, D;
thread t1([&]{
A.lock();
B.lock();
C.lock();
D.lock();
});
thread t2([&]{
D.lock();
B.lock();
});
mutex A, B, C;
thread t1([&] {
A.lock();
B.lock();
});
thread t2([&] {
B.lock();
C.lock();
});
thread t3([&] {
C.lock();
A.lock();
});
Since there is only one thread involved, a deadlock can't possibly happen. Actually, the generalization is that all the code in this thread is implicitly serialized (because it is run in a single thread of execution). Therefore, there exists an implicit happens-before relationship between the statements. Note that a deadlock could still happen if a non-recursive lock was locked recursively by a thread. However, whether such a deadlock happens depends on whether the code path leading to it is taken. In other words, it is deterministic as far as thread scheduling is concerned. If the code path is taken, the deadlock is 100% to happen. Otherwise, the deadlock won't happen and we won't detect anything anyway since the code path was not taken (and we're doing dynamic analysis).
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
B.unlock();
A.unlock();
B.lock();
A.lock();
A.unlock();
B.unlock();
});
If a cycle contains two edges labelled with the same thread, it is ignored because the represented deadlock would require code in the same thread to run concurrently, which is impossible. This is effectively a special case of the happens-before relationship.
Let's augment the lock graph by labelling each edge with the thread that caused that edge to be added.
We will ignore cycles containing two edges labelled with the same thread.
No labels, no edges, like the basic graph.
mutex A, B;
// ...
When an edge is added, we label it with the thread that caused its addition.
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
// ...
We add a parallel edge if the label on it is different from that of existing edges.
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
thread t2([&] {
A.lock();
B.lock();
});
Consider the previous graph with a false positive. We now ignore the single threaded cycle:
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
B.unlock();
A.unlock();
B.lock();
A.lock();
});
The A->B->C cycle with three different threads is a real potential deadlock. The other A->B->C with t1, t2, t2 is a false positive and is ignore because t2 appears twice in the cycle.
And cycles still represent potential deadlocks.
mutex A, B, C;
thread t1([&] {
A.lock();
B.lock();
});
thread t2([&] {
B.lock();
C.lock();
C.unlock();
B.unlock();
C.lock();
A.lock();
});
thread t3([&] {
C.lock();
A.lock();
});
The deadlock may never happen because G has to be held by both threads in order to enter the dangerous section of the code.
However, consider this situation. Both threads must be holding G
, which
is impossible:
mutex A, B, G;
thread t1([&] {
G.lock();
A.lock();
B.lock();
});
thread t2([&] {
G.lock();
B.lock();
A.lock();
});
G
is a 'gatelock' protecting that cycleThis time, we will augment the edge labels to record the set of locks held by the thread causing an edge to be added to the lock graph.
A cycle is not valid if the gatelock sets of any two edges in the cycle intersect, i.e. if they share one or more gatelocks.
mutex A, B, C, D;
// ...
t1
acquires A
while holding nothing
mutex A, B, C, D;
thread t1([&] {
A.lock();
// ...
});
// ...
It is NOT redundant to put A
in the set of gatelocks, as can be seen
in the next slide.
t1
acquires B
while holding A
; A
is put in the gatelocks for that edge
mutex A, B, C, D;
thread t1([&] {
A.lock();
B.lock();
// ...
});
// ...
Here, we can see that putting B
in the set of gatelocks when
acquiring C
is not redundant, because the edge from A
to C
needs it.
t1
acquires C
while holding A
and B
; all the edges that are added to
the graph are marked with these gatelocks
mutex A, B, C, D;
thread t1([&] {
A.lock();
B.lock();
C.lock();
});
// ...
t2
acquires D
while holding nothing
mutex A, B, C, D;
thread t1([&] {
A.lock();
B.lock();
C.lock();
});
thread t2([&] {
D.lock();
// ...
});
t2
acquires A
while holding D
mutex A, B, C, D;
thread t1([&] {
A.lock();
B.lock();
C.lock();
});
thread t2([&] {
D.lock();
A.lock();
// ...
});
t2
acquires B
while holding D
and A
mutex A, B, C, D;
thread t1([&] {
A.lock();
B.lock();
C.lock();
});
thread t2([&] {
D.lock();
A.lock();
B.lock();
});
As expected, the false positive is inhibited by the intersecting sets of gatelocks.
t1
and t2
will never run in parallel:mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
t1.join();
thread t2([&] {
B.lock();
A.lock();
});
t2.join();
t1
'happens-before' t2
.We can implement this relation by associating an identifier to segments of the code that are separated by the start or join of a thread.
When a thread starts another thread, both the parent and the child threads are assigned new segment identifiers.
When a thread joins another thread, the parent thread continues executing with a new segment identifier.
By drawing directed edges between the segments, we end up with a graph where
node v
is reachable from node u
iff u
happens before v
.
If two acquires do not happen before the other, then they must surely happen in parallel.
main
starts in segment 0
// ...
main
starts t1
; main
and t1
get new segments
thread t1([] {});
// ...
main
starts t2
; main
and t2
get new segments
thread t1([] {});
thread t2([] {
// ...
});
// ...
t2
starts t3
; t2
and t3
get new segments
thread t1([] {});
thread t2([] {
thread t3([] {});
// ...
});
// ...
t2
joins t3
; t2
continues in a new segment
thread t1([] {});
thread t2([] {
thread t3([] {});
t3.join();
});
// ...
main
joins t1
; main
continues in a new segment
thread t1([] {});
thread t2([] {
thread t3([] {});
t3.join();
});
t1.join();
// ...
main
joins t2
; main
continues in a new segment
thread t1([] {});
thread t2([] {
thread t3([] {});
t3.join();
});
t1.join();
t2.join();
Let's augment the lock graph by recording the segment in which an acquire is made.
A cycle is not valid if any edge in the cycle happens before another edge in the cycle.
Let's go back to our false positive:
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
t1.join();
thread t2([&] {
B.lock();
A.lock();
});
main
starts in segment 0mutex A, B;
main
starts t1
; main
and t1
get new segmentsmutex A, B;
thread t1([&] {
// ...
});
t1
acquires A
and then B
, both in segment 2mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
main
joins t1
; main
continues in a new segmentmutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
t1.join();
main
starts t2
; main
and t2
get new segmentsmutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
t1.join();
thread t2([&] {
// ...
});
t2
locks B
and then A
, both in segment 4mutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
t1.join();
thread t2([&] {
B.lock();
A.lock();
});
main
joins t2
; main
continues in a new segmentmutex A, B;
thread t1([&] {
A.lock();
B.lock();
});
t1.join();
thread t2([&] {
B.lock();
A.lock();
});
t2.join();
The cycle will be ignored because segment 2 happens before segment 4.
u
to v
means that a thread acquired v
while holding u
start
s and join
su
to v
means that u
happens before v
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
B.unlock();
A.unlock();
B.lock();
A.lock();
A.unlock();
B.unlock();
});
mutex A, B, G;
thread t1([&] {
G.lock();
A.lock();
B.lock();
B.unlock();
A.unlock();
G.unlock();
});
thread t2([&] {
G.lock();
B.lock();
A.lock();
A.unlock();
B.unlock();
G.unlock();
});
mutex A, B;
thread t1([&] {
A.lock();
B.lock();
B.unlock();
A.unlock();
});
t1.join();
thread t2([&] {
B.lock();
A.lock();
A.unlock();
B.unlock();
});
dyno
: a dynamic analysis librarynamespace tags {
struct acquire;
struct lock_id;
}
typedef event<tags::acquire,
records<call_stack>,
records<thread_id>,
records<
custom_info<tags::lock_id, unsigned>
>
> acquire_event;
framework
typedef framework<
events<
acquire_event, release_event,
start_event, join_event
>,
backend<save_on_filesystem>
> d2_framework_t;
static d2_framework_t d2_framework;
dyno::on<tags::acquire>(d2_framework,
[](acquire_event e) {
// whatever
});
struct mutex {
void lock() {
dyno::generate<tags::acquire>(d2_framework, lock_id_);
}
private:
unsigned lock_id_;
};
struct populate_lock_graph {
void operator()(acquire_event e) const;
void operator()(release_event e) const;
template <typename AnyOtherEvent>
void operator()(AnyOtherEvent) const;
};
dyno::load_events("some_directory", populate_lock_graph());
Explain how the project was started by witnessing that much of the code from d2 could be generally useful for dynamic analysis.
d2