Skip to content

incorrect RubyHash multithreaded behaviour when marshalling #1182

@uujava

Description

@uujava

It looks like RubyHash::marshalTo implementation has threading issues. It first saves hash size and then iterates over entries. When different thread modify the hash by adding entries this could result in broken dump. The test below could show what I mean.
We have two classes:
Test class holds a "hash" and an "obj" attributes.
Value class used for values stored in "hash" and in "obj" attribute. "obj" attribute in Test always holds the same Value.
When we start a thread that populate hash and begin marshal load/dump in main thread.
Eventually it will print "Wrong dump " if object attribute will not store Value object but some FixNum instance.

class ::Test
 attr_reader :hash, :obj 
 def initialize
   @hash={}
   @obj= ::Value.new(0,-2)
 end
end

class ::Value
  attr_reader :j
  def initialize i, j
   @i=i
   @j=j
  end
  
  def to_s
    "#{@i} #{@j}"
  end
end

test = ::Test.new
hash_size=200000
dump_count=100
class ::Test
 
 def initialize
   @hash={}
   @obj= ::Value.new(0,-2)
 end

 def obj
  @obj
 end

 def hash
   @hash
 end
 
 def marshal_dump
  [@hash, @obj]
 end

 def marshal_load arr
   @hash = arr[0]
   @obj = arr[1]
 end
end

class ::Value
  def initialize i, j
   @i=i
   @j=j
  end

  def j
    @j
  end  
  
  def to_s
    "#{@i} #{@j}"
  end
end

test = ::Test.new
hash_size=300000
dump_count=50
sample_count = dump_count/10

Thread.new do
   hash_size.times do |i|
    test.hash[i] = ::Value.new(i,1)
   end
end

stop = false

dump_count.times do |i|
  next if stop
  d = Marshal.dump test
  t= Marshal.load(d); 
  size = t.hash.size
  puts "sample: #{i} #{size} '#{test.hash[size-1]}'" if i.divmod(sample_count)[1]==0    
  puts "Wrong dump #{t.obj} #{t.obj.class}" unless t.obj.instance_of? ::Value
  stop = t.hash.size == hash_size
end

One could say that we should not share objects between threads, without proper locking, but still better behavior would be to behave like RubyArray::marshalTo throwing concurrent modification exception in almost the same cases. A patch (for 1.7.4 code base) could be as below:

Index: src/org/jruby/RubyHash.java
===================================================================
--- src/org/jruby/RubyHash.java (date 1381739421000)
+++ src/org/jruby/RubyHash.java (revision )
@@ -626,16 +626,25 @@
     }
 
     public void visitAll(Visitor visitor) {
+       visitLimited(visitor, -1);
+    }
+
+   private void visitLimited(Visitor visitor, long size) {
-        int startGeneration = generation;
+       int startGeneration = generation;
-        for (RubyHashEntry entry = head.nextAdded; entry != head; entry = entry.nextAdded) {
+       long count = size;
+       for (RubyHashEntry entry = head.nextAdded; entry != head && count != 0; entry = entry.nextAdded) {
+           count--;
-            if (startGeneration != generation) {
-                startGeneration = generation;
-                entry = head.nextAdded;
-                if (entry == head) break;
-            }
-            if (entry.isLive()) visitor.visit(entry.key, entry.value);
-        }
+           if (startGeneration != generation) {
+               startGeneration = generation;
+               entry = head.nextAdded;
+               if (entry == head) break;
+           }
+           if (entry.isLive()) visitor.visit(entry.key, entry.value);
+       }
+       // it does not handle all concurrent modification cases,
+       // but at least provdes correct marshal
+       if(count > 0) concurrentModification();
-    }
+   }
 
     /* ============================
      * End of hash internals
@@ -1949,9 +1958,10 @@
     // to totally change marshalling to work with overridden core classes.
     public static void marshalTo(final RubyHash hash, final MarshalStream output) throws IOException {
         output.registerLinkTarget(hash);
-        output.writeInt(hash.size);
+       int hashSize = hash.size;
+       output.writeInt(hashSize);
         try {
-            hash.visitAll(new Visitor() {
+            hash.visitLimited(new Visitor() {
                 public void visit(IRubyObject key, IRubyObject value) {
                     try {
                         output.dumpObject(key);
@@ -1960,7 +1970,7 @@
                         throw new VisitorIOException(e);
                     }
                 }
-            });
+            }, hashSize);
         } catch (VisitorIOException e) {
             throw (IOException)e.getCause();
         }

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions