/*
* Copyright (C) 2005-2013 Team XBMC
* http://xbmc.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XBMC; see the file COPYING. If not, see
* .
*
*/
#ifdef __ppc__
#pragma GCC optimization_level 0
#endif
#include "LockFree.h"
#include
///////////////////////////////////////////////////////////////////////////
// Fast stack implementation
// NOTE: non-locking only on systems that support atomic cas2 operations
///////////////////////////////////////////////////////////////////////////
void lf_stack_init(lf_stack* pStack)
{
pStack->top.ptr = NULL;
pStack->count = 0;
}
void lf_stack_push(lf_stack* pStack, lf_node* pNode)
{
atomic_ptr top, newTop;
do
{
top = pStack->top;
pNode->next.ptr = top.ptr; // Link in the new node
newTop.ptr = pNode;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
} while(cas((long*)&pStack->top, atomic_ptr_to_long(top), atomic_ptr_to_long(newTop)) != atomic_ptr_to_long(top));
#else
newTop.version = top.version + 1;
} while(cas2((long long*)&pStack->top, atomic_ptr_to_long_long(top), atomic_ptr_to_long_long(newTop)) != atomic_ptr_to_long_long(top));
#endif
AtomicIncrement(&pStack->count);
}
lf_node* lf_stack_pop(lf_stack* pStack)
{
atomic_ptr top, newTop;
do
{
top = pStack->top;
if (top.ptr == NULL)
return NULL;
newTop.ptr = ((lf_node*)top.ptr)->next.ptr; // Unlink the current top node
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
} while(cas((long*)&pStack->top, atomic_ptr_to_long(top), atomic_ptr_to_long(newTop)) != atomic_ptr_to_long(top));
#else
newTop.version = top.version + 1;
} while(cas2((long long*)&pStack->top, atomic_ptr_to_long_long(top), atomic_ptr_to_long_long(newTop)) != atomic_ptr_to_long_long(top));
#endif
AtomicDecrement(&pStack->count);
return (lf_node*)top.ptr;
}
///////////////////////////////////////////////////////////////////////////
// Fast heap implementation
// NOTE: non-locking only on systems that support atomic cas2 operations
///////////////////////////////////////////////////////////////////////////
// TODO: Implement auto-shrink based on chunk reference counts
// TODO: Read the page size from the OS or allow caller to specify
// Maybe have a minimum number of blocks...
#define MIN_ALLOC 4096
void lf_heap_init(lf_heap* pHeap, size_t blockSize, size_t initialSize /*= 0*/)
{
pHeap->alloc_lock = 0; // Initialize the allocation lock
pHeap->top_chunk = NULL;
lf_stack_init(&pHeap->free_list); // Initialize the free-list stack
// Perform a few sanity checks on the parameters
if (blockSize < sizeof(lf_node)) // Make sure we have blocks big enough to store in the free-list
blockSize = sizeof(lf_node);
pHeap->block_size = blockSize;
if (initialSize < 10 * blockSize)
initialSize = 10 * blockSize; // TODO: This should be more intelligent
lf_heap_grow(pHeap, initialSize); // Allocate the first chunk
}
void lf_heap_grow(lf_heap* pHeap, size_t size /*= 0*/)
{
long blockSize = pHeap->block_size; // This has already been checked for sanity
if (!size || size < MIN_ALLOC - sizeof(lf_heap_chunk)) // Allocate at least one page from the OS (TODO: Try valloc)
size = MIN_ALLOC - sizeof(lf_heap_chunk);
unsigned int blockCount = size / blockSize;
if (size % blockSize) // maxe sure we have complete blocks
size = blockSize * ++blockCount;
// Allocate the first chunk from the general heap and link it into the chunk list
long mallocSize = size + sizeof(lf_heap_chunk);
lf_heap_chunk* pChunk = (lf_heap_chunk*) malloc(mallocSize);
pChunk->size = mallocSize;
SPINLOCK_ACQUIRE(pHeap->alloc_lock); // Lock the chunk list. Contention here is VERY unlikely, so use the simplest possible sync mechanism.
pChunk->next = pHeap->top_chunk;
pHeap->top_chunk = pChunk; // Link it into the list
SPINLOCK_RELEASE(pHeap->alloc_lock); // The list is now consistent
// Add all blocks to the free-list
unsigned char* pBlock = (unsigned char*)pChunk + sizeof(lf_heap_chunk);
for ( unsigned int block = 0; block < blockCount; block++)
{
lf_stack_push(&pHeap->free_list, (lf_node*)pBlock);
pBlock += blockSize;
}
}
void lf_heap_deinit(lf_heap* pHeap)
{
// Free all allocated chunks
lf_heap_chunk* pNext;
for(lf_heap_chunk* pChunk = pHeap->top_chunk; pChunk; pChunk = pNext)
{
pNext = pChunk->next;
free(pChunk);
}
}
void* lf_heap_alloc(lf_heap* pHeap)
{
void * p = lf_stack_pop(&pHeap->free_list);
if (!p)
{
lf_heap_grow(pHeap, 0);
// TODO: should we just call in recursively?
return lf_stack_pop(&pHeap->free_list); // If growing didn't help, something is wrong (or someone else took them all REALLY fast)
}
return p;
}
void lf_heap_free(lf_heap* pHeap, void* p)
{
if (!p) // Allow for NULL to pass safely
return;
lf_stack_push(&pHeap->free_list, (lf_node*)p); // Return the block to the free list
}
///////////////////////////////////////////////////////////////////////////
// Lock-free queue
///////////////////////////////////////////////////////////////////////////
void lf_queue_init(lf_queue* pQueue)
{
pQueue->len = 0;
lf_heap_init(&pQueue->node_heap, sizeof(lf_queue_node)); // Intialize the node heap
lf_queue_node* pNode = lf_queue_new_node(pQueue); // Create the 'empty' node
pNode->next.ptr = NULL;
pNode->value = (void*)0xdeadf00d;
pQueue->head.ptr = pQueue->tail.ptr = pNode;
}
void lf_queue_deinit(lf_queue* pQueue)
{
lf_heap_deinit(&pQueue->node_heap); // Clean up the node heap
}
// TODO: template-ize
void lf_queue_enqueue(lf_queue* pQueue, void* value)
{
lf_queue_node* pNode = lf_queue_new_node(pQueue); // Get a container
pNode->value = value;
pNode->next.ptr = NULL;
atomic_ptr tail, next, node;
do
{
tail = pQueue->tail;
next = ((lf_queue_node*)tail.ptr)->next;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
if (atomic_ptr_to_long(tail) == atomic_ptr_to_long(pQueue->tail)) // Check consistency
#else
if (atomic_ptr_to_long_long(tail) == atomic_ptr_to_long_long(pQueue->tail)) // Check consistency
#endif
{
if (next.ptr == NULL) // Was tail pointing to the last node?
{
node.ptr = pNode;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
if (cas((long*)&((lf_queue_node*)tail.ptr)->next, atomic_ptr_to_long(next), atomic_ptr_to_long(node)) == atomic_ptr_to_long(next)) // Try to link node at end
#else
node.version = next.version + 1;
if (cas2((long long*)&((lf_queue_node*)tail.ptr)->next, atomic_ptr_to_long_long(next), atomic_ptr_to_long_long(node)) == atomic_ptr_to_long_long(next)) // Try to link node at end
#endif
break; // enqueue is done.
}
else // tail was lagging, try to help...
{
node.ptr = next.ptr;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
cas((long*)&pQueue->tail, atomic_ptr_to_long(tail), atomic_ptr_to_long(node)); // We don't care if we are successful or not
#else
node.version = tail.version + 1;
cas2((long long*)&pQueue->tail, atomic_ptr_to_long_long(tail), atomic_ptr_to_long_long(node)); // We don't care if we are successful or not
#endif
}
}
} while (true); // Keep trying until the enqueue is done
node.ptr = pNode;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
cas((long*)&pQueue->tail, atomic_ptr_to_long(tail), atomic_ptr_to_long(node)); // Try to swing the tail to the new node
#else
node.version = tail.version + 1;
cas2((long long*)&pQueue->tail, atomic_ptr_to_long_long(tail), atomic_ptr_to_long_long(node)); // Try to swing the tail to the new node
#endif
AtomicIncrement(&pQueue->len);
}
// TODO: template-ize
void* lf_queue_dequeue(lf_queue* pQueue)
{
atomic_ptr head, tail, next, node;
void* pVal = NULL;
do
{
head = pQueue->head;
tail = pQueue->tail;
next = ((lf_queue_node*)head.ptr)->next;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
if (atomic_ptr_to_long(head) == atomic_ptr_to_long(pQueue->head)) // Check consistency
#else
if (atomic_ptr_to_long_long(head) == atomic_ptr_to_long_long(pQueue->head)) // Check consistency
#endif
{
if (head.ptr == tail.ptr) // Queue is empty or tail is lagging
{
if (next.ptr == NULL) // Queue is empty
return NULL;
node.ptr = next.ptr;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
cas((long*)&pQueue->tail, atomic_ptr_to_long(tail), atomic_ptr_to_long(node)); // Tail is lagging. Try to advance it.
#else
node.version = tail.version + 1;
cas2((long long*)&pQueue->tail, atomic_ptr_to_long_long(tail), atomic_ptr_to_long_long(node)); // Tail is lagging. Try to advance it.
#endif
}
else // Tail is consistent. No need to deal with it.
{
pVal = ((lf_queue_node*)next.ptr)->value;
node.ptr = next.ptr;
#if defined(__ppc__) || defined(__powerpc__) || defined(__arm__)
if (cas((long*)&pQueue->head, atomic_ptr_to_long(head), atomic_ptr_to_long(node)) == atomic_ptr_to_long(head))
#else
node.version = head.version + 1;
if (cas2((long long*)&pQueue->head, atomic_ptr_to_long_long(head), atomic_ptr_to_long_long(node)) == atomic_ptr_to_long_long(head))
#endif
break; // Dequeue is done
}
}
} while (true); // Keep trying until the dequeue is done or the queue empties
lf_queue_free_node(pQueue, head.ptr);
AtomicDecrement(&pQueue->len);
return pVal;
}
#ifdef __ppc__
#pragma GCC optimization_level reset
#endif