-
-
Notifications
You must be signed in to change notification settings - Fork 942
Description
It looks like RubyHash::marshalTo implementation has threading issues. It first saves hash size and then iterates over entries. When different thread modify the hash by adding entries this could result in broken dump. The test below could show what I mean.
We have two classes:
Test class holds a "hash" and an "obj" attributes.
Value class used for values stored in "hash" and in "obj" attribute. "obj" attribute in Test always holds the same Value.
When we start a thread that populate hash and begin marshal load/dump in main thread.
Eventually it will print "Wrong dump " if object attribute will not store Value object but some FixNum instance.
class ::Test
attr_reader :hash, :obj
def initialize
@hash={}
@obj= ::Value.new(0,-2)
end
end
class ::Value
attr_reader :j
def initialize i, j
@i=i
@j=j
end
def to_s
"#{@i} #{@j}"
end
end
test = ::Test.new
hash_size=200000
dump_count=100
class ::Test
def initialize
@hash={}
@obj= ::Value.new(0,-2)
end
def obj
@obj
end
def hash
@hash
end
def marshal_dump
[@hash, @obj]
end
def marshal_load arr
@hash = arr[0]
@obj = arr[1]
end
end
class ::Value
def initialize i, j
@i=i
@j=j
end
def j
@j
end
def to_s
"#{@i} #{@j}"
end
end
test = ::Test.new
hash_size=300000
dump_count=50
sample_count = dump_count/10
Thread.new do
hash_size.times do |i|
test.hash[i] = ::Value.new(i,1)
end
end
stop = false
dump_count.times do |i|
next if stop
d = Marshal.dump test
t= Marshal.load(d);
size = t.hash.size
puts "sample: #{i} #{size} '#{test.hash[size-1]}'" if i.divmod(sample_count)[1]==0
puts "Wrong dump #{t.obj} #{t.obj.class}" unless t.obj.instance_of? ::Value
stop = t.hash.size == hash_size
end
One could say that we should not share objects between threads, without proper locking, but still better behavior would be to behave like RubyArray::marshalTo throwing concurrent modification exception in almost the same cases. A patch (for 1.7.4 code base) could be as below:
Index: src/org/jruby/RubyHash.java
===================================================================
--- src/org/jruby/RubyHash.java (date 1381739421000)
+++ src/org/jruby/RubyHash.java (revision )
@@ -626,16 +626,25 @@
}
public void visitAll(Visitor visitor) {
+ visitLimited(visitor, -1);
+ }
+
+ private void visitLimited(Visitor visitor, long size) {
- int startGeneration = generation;
+ int startGeneration = generation;
- for (RubyHashEntry entry = head.nextAdded; entry != head; entry = entry.nextAdded) {
+ long count = size;
+ for (RubyHashEntry entry = head.nextAdded; entry != head && count != 0; entry = entry.nextAdded) {
+ count--;
- if (startGeneration != generation) {
- startGeneration = generation;
- entry = head.nextAdded;
- if (entry == head) break;
- }
- if (entry.isLive()) visitor.visit(entry.key, entry.value);
- }
+ if (startGeneration != generation) {
+ startGeneration = generation;
+ entry = head.nextAdded;
+ if (entry == head) break;
+ }
+ if (entry.isLive()) visitor.visit(entry.key, entry.value);
+ }
+ // it does not handle all concurrent modification cases,
+ // but at least provdes correct marshal
+ if(count > 0) concurrentModification();
- }
+ }
/* ============================
* End of hash internals
@@ -1949,9 +1958,10 @@
// to totally change marshalling to work with overridden core classes.
public static void marshalTo(final RubyHash hash, final MarshalStream output) throws IOException {
output.registerLinkTarget(hash);
- output.writeInt(hash.size);
+ int hashSize = hash.size;
+ output.writeInt(hashSize);
try {
- hash.visitAll(new Visitor() {
+ hash.visitLimited(new Visitor() {
public void visit(IRubyObject key, IRubyObject value) {
try {
output.dumpObject(key);
@@ -1960,7 +1970,7 @@
throw new VisitorIOException(e);
}
}
- });
+ }, hashSize);
} catch (VisitorIOException e) {
throw (IOException)e.getCause();
}