module d_gc_usage; /* A little tutorial on the D GC. by leonardo maffi, V.1.2, Sep 21 2008. This code is relative to the D DMD compiler V.1.035 and it summarizes with a program implementation some experiments done with the garbage collector (gc) of the D language. So this code is commented to explain how to design a D program that interact closely with the GC and uses the GC API. As sample program I have chosen to implement a simple stack data structure, a class template defined on the type T, that's the type of the item the stack can contain. From my experiments the faster implementation in D for this data structure is just a contiguous GC-managed block of memory, that is slowly realloc-ed as it grows geometrically to contain the items. But In the following code I have used a more complex implementation for didactic purposes, to show more of the correct usage of the GC. The data structure used in this code is a singly linked list of memory blocks, that grow geometrically in size. This code doesn't contain some basic and quite useful methods (member functions), like a clear, a length, a way to reduce the length, etc, because I have tried to keep the whole code short. For a real, more complete and faster implementation of this data structure in D, look at ArrayBuilder into the d.builders module of my D libs: http://www.fantascienza.net/leonardo/so/libs_d.zip The code contains a printing line in the toarray() method that you can comment out, that shows the allocated chunk sizes. Note that the Stack throws OutOfMemoryException when it can't allocate more space. If the data structure is well designed, such exception can be catch, and the calling program can keep using the data structure without more allocations. This means that before throwing that exception the data structure has to take care to undo any change. At the moment this Stack isn't designed so well, so after a OutOfMemoryException it's probably in a faulty state. So this Stack code can be improved In such regards. Warning: programs that use the GC API explicitly are (for me) exceptionally difficult to get right. Even with max cure, looking at each line of code several times bugs can be present still. So such kind of code has to be used only in special situations. In all not critical situations it's much much better to leave everything to the automatic work of the GC (or to use fully manually managed memory from the C heap, keeping track of any reference it may contain). A general strategy to write such programs that use the GC API explicitly with a little probability of getting them right is to never delete GC-managed memory manually (with a realloc(ptr, 0), but to tell the GC or to make the GC remove some roots or references to the GC-managed memory, then later, when such memory is actually not referenced anymore (especially from INBOUND pointers/references) the GC will take care of deallocating it automatically and safely. */ import std.stdio: writefln; /* Here I have used a static import of the gc to avoid confusing malloc() and realloc() with the functions with the same name of std.c.stdlib. */ static import std.gc; // To be thrown when an malloc/realloc of the GC returns null. import std.outofmemory: OutOfMemoryException; // The following templates are mostly helpers of the IsReferenceType template. template ArrayType1(T: T[]) { alias T ArrayType1; } template IsType(T, Types...) { // Original idea by Burton Radons, modified static if (Types.length == 0) const bool IsType = false; else const bool IsType = is(T == Types[0]) || IsType!(T, Types[1 .. $]); } template BaseTypedef(T) { static if( is( T BaseType1 == typedef ) ) alias BaseTypedef!(BaseType1) BaseTypedef; else alias T BaseTypedef; } /* IsReferenceType is true when a given type(s) is/are a pointer or class, or it's a container that contains a reference type like a pointer of class. Note that it can't tell apart normal pointers from GC-managed pointers, for that you need TypeInfo.flags, but that works at run-tine only, where this template works at compile time, so it can be used into static ifs too. */ template IsReferenceType(Types...) { static if (Types.length == 0) { const bool IsReferenceType = false; } else static if (Types.length == 1) { // dsimcha suggests to replace the following line with // static if (IsType!(Mutable!(Types[0]), bool, byte, ... // to make this template work with const/immutable types on D 2.x. static if (IsType!(BaseTypedef!(Types[0]), bool, byte, ubyte, short, ushort, int, uint, long, ulong, float, double, real, ifloat, idouble, ireal, cfloat, cdouble, creal, char, dchar, wchar) ) { const bool IsReferenceType = false; } else static if ( is(BaseTypedef!(Types[0]) == class) || is(BaseTypedef!(Types[0]) == interface) ) { const bool IsReferenceType = true; } else static if ( is(BaseTypedef!(Types[0]) == struct) ) { const bool IsReferenceType = IsReferenceType!(FieldTypeTuple!(Types[0])); } else static if (IsStaticArray!(BaseTypedef!(Types[0]))) { const bool IsReferenceType = IsReferenceType!(ArrayType1!(BaseTypedef!(Types[0]))); } else const bool IsReferenceType = true; } else const bool IsReferenceType = IsReferenceType!(Types[0]) | IsReferenceType!(Types[1 .. $]); } // end IsReferenceType!() template IsDynamicArray(T) { // Adapted from the module tango.core.Traits, // (C) 2005-2006 Sean Kelly, BSD style licence const bool IsDynamicArray = is( typeof(T.init[0])[] == T ); } template IsArray(T) { const bool IsArray = is(typeof(T.length)) && is(typeof(T.sort)) && is(typeof(T.reverse)) && is(typeof(T.dup)); } template IsStaticArray(T) { const bool IsStaticArray = IsArray!(T) && (!IsDynamicArray!(T)); } int min(int a, int b) { return a <= b ? a : b; } // This Stack data structure works if T is an associative array too. class Stack(T) { // STARTING_SIZE is a starting point for the size of the stack, that is 4 items const int STARTING_SIZE = 4; /*FAST_GROW is the factor of geometrical grow used when the data structure holds few items. Later SLOW_GROW is used instead to avoid allocating too much memory and reduce the probability of memory overflows. */ const int FAST_GROW = 2; // BIG_MEM is used as a threshold to denote the transition from FAST_GROW to SLOW_GROW const int BIG_MEM = (1 << 27) / T.sizeof; // about 134_217_728 bytes const float SLOW_GROW = 1.3; // the type of the data block. struct Block { int len; // To know how long this block is. This isn't strictly necessary because the // grow rate is known, and the block position can be kept externally, so the // block size can be computed. But such field is used because it doesn't // use too much memory, and allows to simplify the code, avoiding the // re-computation of the block length each time. Block* next; // pointer to the next block or null. T* data; // Pointer to the chunk of memory that contains the data. // In C there's a common idiom of using an array[1] here // and then resizing the struct block itself to contain // an arbitrary sized (cut constant) block of memory. // But in D, when not in -release mode, array bounds are // controlled, so that idiom can't be used. } private Block* list_head; // Pointer to the oldest/smallest Block of the linked list. private Block* list_tail; // Pointer to the newest/largest Block of the linked list. private int total_len; // Total number of items appended to the Stack. private int used_last_block; // How many items are used in the last (tail) block. // Constructor. this() { // Starting point, just 4 items this.list_head = block_alloc(STARTING_SIZE); // Both tail and head are set at the same little initial block. this.list_tail = list_head; } // Destructor. ~this() { // As you can see we don't actually delete the memory with realloc(ptr, 0). // We just remove the roots to the pointers to the pointers to the allocated // memory chunk, then the GC when there are no more inbound pointers will // deallocate the memory safely. // We scan the linked list, removing all the ranges that refer to the pointers // that point to the allocated memory chunks. while (this.list_head != null) { // next_ptr makes a reference to list_head.next on the stack so that it isn't // GC'd immediately. Block* next_ptr = this.list_head.next; // At the moment this operation is O(n) in the Phobos GC. // Hopefully it will be fixed. // Note that removeRange() needs only the starting point of a range. // Regarding the presence of & see the explanations in the block_alloc() // method below. // removeRange() prevents GC from scanning list_head.next and // list_head.data for pointers. The next is still referenced from stack so // it lives yet. The data is not referenced from list_head anymore, and if // there are no other references to it, it is GC'd eventually. If there // are references though, it lives while those references are alive, which // is expected and correct. std.gc.removeRange(&this.list_head.data); // This removes the last reference to the former list_head so it gets GC'd // automatically later. this.list_head = next_ptr; } } // This method allocates the chunk of memory that contains the items. private T* items_alloc(int nitems) { // Try allocating GC memory. It's important to use GC memory because in general // T can be a GC-managed type, like a class. // Such memory is dirty, it may contain anything. auto p = cast(T*)std.gc.malloc(nitems * T.sizeof); if (p is null) throw new OutOfMemoryException(); /* This tells the compiler that the memory chunk pointed by p is an array of types T. For example if T is struct { SomeStruct* s, int i } then setTypeInfo() tells the GC to not look for pointer into the int 'i'. It T is an int the GC will not scan the memory block at all (in theory). (The GC documentation says 'as many as will fit', hopefully exactly nitems). Here the right thing to do is to use setTypeInfo(): std.gc.setTypeInfo(typeid(T), p); But at the moment setTypeInfo() is just: void setTypeInfo(TypeInfo ti, void* p) { if (ti.flags() & 1) hasNoPointers(p); else hasPointers(p); } So it's worse than useless (also because ti.flags() is sometimes buggy/wrong). */ static if (IsReferenceType!(T)) { // If T is a reference type, like a GC-managed pointer, then the GC must // scan it looking for pointers. The setTypeInfo() helps it where to look // for pointers, but it's important to actually clean the memory, so there // no risks of leaving dirty pointers, etc. // Unfortunately we can't just do p[0 .. nitems] = T.init; because in D V.1.035 // that doesn't work if T is a static array. So we have to manage the // two cases independently. static if (IsStaticArray!(T)) { // This can't be used, because of a DMD BUG present in V.1.035 still: // p[0 .. nitems][] = T.init; T Tinit; p[0 .. nitems][] = Tinit; } else p[0 .. nitems] = T.init; } else { // If T isn't a reference type, like an int, there's no need to clean the // memory given by malloc() because we always know how many items we have // inserted, so we can save the cleaning time. // We have also to tell the GC that this memory zone doesn't contain // pointers, so it doesn't slows down scanning it and avoids finding // spurious pointers. // This line can be removed once the setTypeInfo() becomes more precise and // better. hasNoPointers(this.data); } return p; } // This method creates a new block with space for nitems items. private Block* block_alloc(int nitems) { // Here a malloc too can be used, but only if you take care of initialize all the // fields correctly (note that pointers don't have to be initialized to 0, but // to null. // In general there can be CPUs where null isn't 0). // But the simpler way is just to use a 'new'. auto new_block = new Block; // Allocation of the space to contain nitems. new_block.data = cast(T*)items_alloc(nitems); /* What a root is: is a variable added to an array the GC holds inside. It's an absolute pointer, and becomes deleted only when the programmer deletes it explicitly with a GC command. A root is a starting point for the GC to look for memory to scan and collect. Note: +1 below is necessary because addRange() takes an interval open on the right. AddRange() takes two pointers, then why we use the & there? It's a trick: here we actually add a range of pointers to pointers, not just a range of pointers. This allows to keep the scanning correct even if new_block.data/new_block.data later change their contents. Because the pointers to those pointers don't change and lead to a correct scan. See at the bottom of this source code for an alternative solution. */ std.gc.addRange(&new_block.data, &new_block.data + 1); // Setting the item 'len' (the 'next' field was already set to null). new_block.len = nitems; return new_block; } /** Append an item to the Stack. Another similar method can be created to extend the Stack with a given array of items. */ final void opCatAssign(T x) { // If the last block is fully used then a new block must be created. if (this.used_last_block == this.list_tail.len) { // then last block is full and we must create a new one this.used_last_block = 0; Block* new_block; if (this.list_tail.len < BIG_MEM) // 2X growing factor for amortized O(1) growing time, and to reduce // RAM fragmentation too. new_block = block_alloc(cast(int)(this.list_tail.len * FAST_GROW)); else // Slower growing rate to save RAM. new_block = block_alloc(cast(int)(this.list_tail.len * SLOW_GROW)); // Append the newly created block to the the tail of the linked list. this.list_tail.next = new_block; // Move the tail pointer to point the new actual last block. this.list_tail = new_block; } // Append the given item x to the Stack. // Again we have to tell apart the case when T is a static array. static if (IsStaticArray!(T)) this.list_tail.data[this.used_last_block][] = x; else this.list_tail.data[this.used_last_block] = x; // The number of items used in the last block is grown by 1 this.used_last_block++; // The total number of items in the Stack is grown by 1 this.total_len++; } /** This converts the Stack into a normal array. This operation isn't common for stack data structures, but it's useful to use this Stack as an array builder too. */ T[] toarray() { // If the total len is 0, then we can return the empty array T[0]. if (!this.total_len) return null; // We allocate a memory chunk large enough to contain all items. auto result_ptr = cast(T*)items_alloc(this.total_len); // This operation is O(1), because in D array slices don't copy data. T[] result = result_ptr[0 .. this.total_len]; // Now we scan the list copying the memory into the result array. Block* aux_ptr = this.list_head; int pos; while (aux_ptr != null) { // Of the last block copy only up to the total_len. int ncopy = min(aux_ptr.len, this.total_len - pos); writefln(ncopy, " ", aux_ptr.len); // You can comment out this line. result[pos .. pos + ncopy] = aux_ptr.data[0 .. ncopy]; pos += ncopy; // Move to the next block of the linked list aux_ptr = aux_ptr.next; } // We delete/modify anything, so the toarray() can be called many times. return result; } } // End of Stack!() void main() { int x = 5; auto st = new Stack!(int*); // I use 70_000 because it's large enough to trigger automatic memory cleaning, // this helps spotting programming bugs. for (int i; i < 70_000; i++) st ~= &x; std.gc.fullCollect(); // To test even better. auto aux = st.toarray; } /* This an alternative solution, replace Block.next with: private Block* _next; Block* next() {return _next;} Block* next(Block* other) { removeRoot(_next); _next = other; addRoot(_next); return _next; } */