1717package com .cloud .hypervisor .kvm .storage ;
1818
1919import java .io .File ;
20+ import java .io .FileInputStream ;
21+ import java .io .BufferedInputStream ;
22+ import java .io .IOException ;
2023import java .net .URI ;
2124import java .net .URISyntaxException ;
2225import java .util .ArrayList ;
3740import org .libvirt .StoragePoolInfo ;
3841import org .libvirt .StorageVol ;
3942import org .libvirt .StoragePoolInfo .StoragePoolState ;
43+ import com .ceph .rados .Rados ;
44+ import com .ceph .rados .RadosException ;
45+ import com .ceph .rados .IoCTX ;
46+ import com .ceph .rbd .Rbd ;
47+ import com .ceph .rbd .RbdImage ;
48+ import com .ceph .rbd .RbdException ;
4049
4150import com .cloud .agent .api .ManageSnapshotCommand ;
4251import com .cloud .hypervisor .kvm .resource .LibvirtConnection ;
@@ -63,6 +72,8 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
6372 private String _mountPoint = "/mnt" ;
6473 private String _manageSnapshotPath ;
6574
75+ private String rbdTemplateSnapName = "cloudstack-base-snap" ;
76+
6677 public LibvirtStorageAdaptor (StorageLayer storage ) {
6778 _storageLayer = storage ;
6879 _manageSnapshotPath = Script .findScript ("scripts/storage/qcow2/" ,
@@ -638,6 +649,15 @@ public boolean deletePhysicalDisk(String uuid, KVMStoragePool pool) {
638649 }
639650 }
640651
652+ /**
653+ * This function copies a physical disk from Secondary Storage to Primary Storage
654+ * or from Primary to Primary Storage
655+ *
656+ * The first time a template is deployed in Primary Storage it will be copied from
657+ * Secondary to Primary.
658+ *
659+ * If it has been created on Primary Storage, it will be copied on the Primary Storage
660+ */
641661 @ Override
642662 public KVMPhysicalDisk createDiskFromTemplate (KVMPhysicalDisk template ,
643663 String name , PhysicalDiskFormat format , long size , KVMStoragePool destPool ) {
@@ -690,21 +710,118 @@ public KVMPhysicalDisk createDiskFromTemplate(KVMPhysicalDisk template,
690710
691711 if (srcPool .getType () != StoragePoolType .RBD ) {
692712 srcFile = new QemuImgFile (template .getPath (), template .getFormat ());
713+ qemu .convert (srcFile , destFile );
693714 } else {
694- template .setFormat (PhysicalDiskFormat .RAW );
695- srcFile = new QemuImgFile (KVMPhysicalDisk .RBDStringBuilder (srcPool .getSourceHost (),
696- srcPool .getSourcePort (),
697- srcPool .getAuthUserName (),
698- srcPool .getAuthSecret (),
699- template .getPath ()));
700- srcFile .setFormat (template .getFormat ());
715+
716+ /**
717+ * We have to find out if the source file is in the same RBD pool and has
718+ * RBD format 2 before we can do a layering/clone operation on the RBD image
719+ *
720+ * This will be the case when the template is already on Primary Storage and
721+ * we want to copy it
722+ */
723+
724+ /* Feature 1<<0 means layering in RBD format 2 */
725+ int rbdFeatures = (1 <<0 );
726+ /* Order 0 means 4MB blocks (the default) */
727+ int rbdOrder = 0 ;
728+
729+ try {
730+ if ((srcPool .getSourceHost ().equals (destPool .getSourceHost ())) && (srcPool .getSourceDir ().equals (destPool .getSourceDir ()))) {
731+ /* We are on the same Ceph cluster, but we require RBD format 2 on the source image */
732+ s_logger .debug ("Trying to perform a RBD clone (layering) since we are operating in the same storage pool" );
733+
734+ Rados r = new Rados (srcPool .getAuthUserName ());
735+ r .confSet ("mon_host" , srcPool .getSourceHost () + ":" + srcPool .getSourcePort ());
736+ r .confSet ("key" , srcPool .getAuthSecret ());
737+ r .connect ();
738+ s_logger .debug ("Succesfully connected to Ceph cluster at " + r .confGet ("mon_host" ));
739+
740+ IoCTX io = r .ioCtxCreate (srcPool .getSourceDir ());
741+ Rbd rbd = new Rbd (io );
742+ RbdImage srcImage = rbd .open (template .getName ());
743+
744+ if (srcImage .isOldFormat ()) {
745+ /* The source image is RBD format 1, we have to do a regular copy */
746+ s_logger .debug ("The source image " + srcPool .getSourceDir () + "/" + template .getName ()
747+ + " is RBD format 1. We have to perform a regular copy (" + template .getVirtualSize () + " bytes)" );
748+
749+ rbd .create (disk .getName (), template .getVirtualSize (), rbdFeatures , rbdOrder );
750+ RbdImage destImage = rbd .open (disk .getName ());
751+
752+ s_logger .debug ("Starting to copy " + srcImage .getName () + " to " + destImage .getName () + " in Ceph pool " + srcPool .getSourceDir ());
753+ rbd .copy (srcImage , destImage );
754+
755+ s_logger .debug ("Finished copying " + srcImage .getName () + " to " + destImage .getName () + " in Ceph pool " + srcPool .getSourceDir ());
756+ rbd .close (destImage );
757+ } else {
758+ s_logger .debug ("The source image " + srcPool .getSourceDir () + "/" + template .getName ()
759+ + " is RBD format 2. We will perform a RBD clone using snapshot "
760+ + this .rbdTemplateSnapName );
761+ /* The source image is format 2, we can do a RBD snapshot+clone (layering) */
762+ rbd .clone (template .getName (), this .rbdTemplateSnapName , io , disk .getName (), rbdFeatures , rbdOrder );
763+ s_logger .debug ("Succesfully cloned " + template .getName () + "@" + this .rbdTemplateSnapName + " to " + disk .getName ());
764+ }
765+
766+ rbd .close (srcImage );
767+ r .ioCtxDestroy (io );
768+ } else {
769+ /* The source pool or host is not the same Ceph cluster, we do a simple copy with Qemu-Img */
770+ s_logger .debug ("Both the source and destination are RBD, but not the same Ceph cluster. Performing a copy" );
771+
772+ Rados rSrc = new Rados (srcPool .getAuthUserName ());
773+ rSrc .confSet ("mon_host" , srcPool .getSourceHost () + ":" + srcPool .getSourcePort ());
774+ rSrc .confSet ("key" , srcPool .getAuthSecret ());
775+ rSrc .connect ();
776+ s_logger .debug ("Succesfully connected to source Ceph cluster at " + rSrc .confGet ("mon_host" ));
777+
778+ Rados rDest = new Rados (destPool .getAuthUserName ());
779+ rDest .confSet ("mon_host" , destPool .getSourceHost () + ":" + destPool .getSourcePort ());
780+ rDest .confSet ("key" , destPool .getAuthSecret ());
781+ rDest .connect ();
782+ s_logger .debug ("Succesfully connected to source Ceph cluster at " + rDest .confGet ("mon_host" ));
783+
784+ IoCTX sIO = rSrc .ioCtxCreate (srcPool .getSourceDir ());
785+ Rbd sRbd = new Rbd (sIO );
786+
787+ IoCTX dIO = rDest .ioCtxCreate (destPool .getSourceDir ());
788+ Rbd dRbd = new Rbd (dIO );
789+
790+ s_logger .debug ("Creating " + disk .getName () + " on the destination cluster " + rDest .confGet ("mon_host" )
791+ + " in pool " + destPool .getSourceDir ());
792+ dRbd .create (disk .getName (), template .getVirtualSize (), rbdFeatures , rbdOrder );
793+
794+ RbdImage srcImage = sRbd .open (template .getName ());
795+ RbdImage destImage = dRbd .open (disk .getName ());
796+
797+ s_logger .debug ("Copying " + template .getName () + " from Ceph cluster " + rSrc .confGet ("mon_host" ) + " to " + disk .getName ()
798+ + " on cluster " + rDest .confGet ("mon_host" ));
799+ sRbd .copy (srcImage , destImage );
800+
801+ sRbd .close (srcImage );
802+ dRbd .close (destImage );
803+
804+ rSrc .ioCtxDestroy (sIO );
805+ rDest .ioCtxDestroy (dIO );
806+ }
807+ } catch (RadosException e ) {
808+ s_logger .error ("Failed to perform a RADOS action on the Ceph cluster, the error was: " + e .getMessage ());
809+ disk = null ;
810+ } catch (RbdException e ) {
811+ s_logger .error ("Failed to perform a RBD action on the Ceph cluster, the error was: " + e .getMessage ());
812+ disk = null ;
813+ }
701814 }
702- qemu .convert (srcFile , destFile );
703815 }
704816 } catch (QemuImgException e ) {
705817 s_logger .error ("Failed to create " + disk .getPath () +
706818 " due to a failed executing of qemu-img: " + e .getMessage ());
707819 }
820+
821+ if (disk == null ) {
822+ throw new CloudRuntimeException ("Failed to create " + disk .getPath () + " from template " + template .getName ());
823+ }
824+
708825 return disk ;
709826 }
710827
@@ -733,17 +850,26 @@ public List<KVMPhysicalDisk> listPhysicalDisks(String storagePoolUuid,
733850 }
734851 }
735852
853+ /**
854+ * This copies a volume from Primary Storage to Secondary Storage
855+ *
856+ * In theory it could also do it the other way around, but the current implementation
857+ * in ManagementServerImpl shows that the destPool is always a Secondary Storage Pool
858+ */
736859 @ Override
737860 public KVMPhysicalDisk copyPhysicalDisk (KVMPhysicalDisk disk , String name ,
738861 KVMStoragePool destPool ) {
739862
740- /*
863+ /**
741864 With RBD you can't run qemu-img convert with an existing RBD image as destination
742865 qemu-img will exit with the error that the destination already exists.
743866 So for RBD we don't create the image, but let qemu-img do that for us.
744867
745868 We then create a KVMPhysicalDisk object that we can return
746- */
869+
870+ It is however very unlikely that the destPool will be RBD, since it isn't supported
871+ for Secondary Storage
872+ */
747873
748874 KVMPhysicalDisk newDisk ;
749875 if (destPool .getType () != StoragePoolType .RBD ) {
@@ -791,15 +917,97 @@ public KVMPhysicalDisk copyPhysicalDisk(KVMPhysicalDisk disk, String name,
791917 + srcFile .getFileName () + " the error was: " + e .getMessage ());
792918 }
793919 }
920+
921+ try {
922+ qemu .convert (srcFile , destFile );
923+ } catch (QemuImgException e ) {
924+ s_logger .error ("Failed to convert " + srcFile .getFileName () + " to "
925+ + destFile .getFileName () + " the error was: " + e .getMessage ());
926+ }
927+
794928 } else if ((srcPool .getType () != StoragePoolType .RBD ) && (destPool .getType () == StoragePoolType .RBD )) {
795- srcFile = new QemuImgFile (sourcePath , sourceFormat );
796- destFile = new QemuImgFile (KVMPhysicalDisk .RBDStringBuilder (destPool .getSourceHost (),
797- destPool .getSourcePort (),
798- destPool .getAuthUserName (),
799- destPool .getAuthSecret (),
800- destPath ));
801- destFile .setFormat (destFormat );
929+ /**
930+ * Qemu doesn't support writing to RBD format 2 directly, so we have to write to a temporary RAW file first
931+ * which we then convert to RBD format 2.
932+ *
933+ * A HUGE performance gain can be achieved here if QCOW2 -> RBD format 2 can be done in one step
934+ */
935+ s_logger .debug ("The source image is not RBD, but the destination is. We will convert into RBD format 2" );
936+ String tmpFile = "/tmp/" + name ;
937+ int rbdFeatures = (1 <<0 );
938+ int rbdOrder = 0 ;
939+
940+ try {
941+ srcFile = new QemuImgFile (sourcePath , sourceFormat );
942+ destFile = new QemuImgFile (tmpFile );
943+ s_logger .debug ("Converting " + srcFile .getFileName () + " to " + tmpFile + " as a temporary file for RBD conversion" );
944+ qemu .convert (srcFile , destFile );
945+
946+ // We now convert the temporary file to a RBD image with format 2
947+ Rados r = new Rados (destPool .getAuthUserName ());
948+ r .confSet ("mon_host" , destPool .getSourceHost () + ":" + destPool .getSourcePort ());
949+ r .confSet ("key" , destPool .getAuthSecret ());
950+ r .connect ();
951+ s_logger .debug ("Succesfully connected to Ceph cluster at " + r .confGet ("mon_host" ));
952+
953+ IoCTX io = r .ioCtxCreate (destPool .getSourceDir ());
954+ Rbd rbd = new Rbd (io );
955+
956+ s_logger .debug ("Creating RBD image " + name + " in Ceph pool " + destPool .getSourceDir () + " with RBD format 2" );
957+ rbd .create (name , disk .getVirtualSize (), rbdFeatures , rbdOrder );
958+
959+ RbdImage image = rbd .open (name );
960+
961+ // We now read the temporary file and write it to the RBD image
962+ File fh = new File (tmpFile );
963+ BufferedInputStream bis = new BufferedInputStream (new FileInputStream (fh ));
964+
965+ int chunkSize = 4194304 ;
966+ long offset = 0 ;
967+ s_logger .debug ("Reading temporary file " + tmpFile + " (" + fh .length () + " bytes) into RBD image " + name + " in chunks of " + chunkSize + " bytes" );
968+ while (true ) {
969+ byte [] buf = new byte [chunkSize ];
970+
971+ int bytes = bis .read (buf );
972+ if (bytes <= 0 ) {
973+ break ;
974+ }
975+ image .write (buf , offset , bytes );
976+ offset += bytes ;
977+ }
978+ s_logger .debug ("Completed writing " + tmpFile + " to RBD image " + name + ". Bytes written: " + offset );
979+ bis .close ();
980+ s_logger .debug ("Removing temporary file " + tmpFile );
981+ fh .delete ();
982+
983+ /* Snapshot the image and protect that snapshot so we can clone (layer) from it */
984+ s_logger .debug ("Creating RBD snapshot " + this .rbdTemplateSnapName + " on image " + name );
985+ image .snapCreate (this .rbdTemplateSnapName );
986+ s_logger .debug ("Protecting RBD snapshot " + this .rbdTemplateSnapName + " on image " + name );
987+ image .snapProtect (this .rbdTemplateSnapName );
988+
989+ rbd .close (image );
990+ r .ioCtxDestroy (io );
991+ } catch (QemuImgException e ) {
992+ s_logger .error ("Failed to do a temp convert from " + srcFile .getFileName () + " to "
993+ + destFile .getFileName () + " the error was: " + e .getMessage ());
994+ newDisk = null ;
995+ } catch (RadosException e ) {
996+ s_logger .error ("A Ceph RADOS operation failed (" + e .getReturnValue () + "). The error was: " + e .getMessage ());
997+ newDisk = null ;
998+ } catch (RbdException e ) {
999+ s_logger .error ("A Ceph RBD operation failed (" + e .getReturnValue () + "). The error was: " + e .getMessage ());
1000+ newDisk = null ;
1001+ } catch (IOException e ) {
1002+ s_logger .error ("Failed reading the temporary file during the conversion to RBD: " + e .getMessage ());
1003+ newDisk = null ;
1004+ }
1005+
8021006 } else {
1007+ /**
1008+ We let Qemu-Img do the work here. Although we could work with librbd and have that do the cloning
1009+ it doesn't benefit us. It's better to keep the current code in place which works
1010+ */
8031011 srcFile = new QemuImgFile (KVMPhysicalDisk .RBDStringBuilder (srcPool .getSourceHost (),
8041012 srcPool .getSourcePort (),
8051013 srcPool .getAuthUserName (),
@@ -812,17 +1020,19 @@ public KVMPhysicalDisk copyPhysicalDisk(KVMPhysicalDisk disk, String name,
8121020 destPool .getAuthSecret (),
8131021 destPath ));
8141022 destFile .setFormat (destFormat );
815- }
8161023
817- if (srcFile != null && destFile != null ) {
8181024 try {
8191025 qemu .convert (srcFile , destFile );
8201026 } catch (QemuImgException e ) {
8211027 s_logger .error ("Failed to convert " + srcFile .getFileName () + " to "
8221028 + destFile .getFileName () + " the error was: " + e .getMessage ());
1029+ newDisk = null ;
8231030 }
8241031 }
8251032
1033+ if (newDisk == null ) {
1034+ throw new CloudRuntimeException ("Failed to copy " + disk .getPath () + " to " + name );
1035+ }
8261036
8271037 return newDisk ;
8281038 }
0 commit comments