// Copyright David Abrahams 2009. Distributed under the Boost
// Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// #define _GLIBCXX_DEBUG

#include <utility>
#include <memory>
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <cassert>

// Test for different implementations of move assignment.
//
// Configuration macros:
//
// ARRAY      - if defined, test array<int,100>.  Otherwise test vector<int>
//
// Testing array
// -------------
// 
// SWAP_TMP           - move assignment implemented as "move to temporary+swap", 
//                      i.e. T(move(rhs)).swap(*this)
// 
// SIMPLE_MINDED_MOVE - move assignment uses straightforward minimal implementation
//
//                    - otherwise, move assignment uses highly optimized minimal implementation
//
// Testing vector
// --------------
//
// SWAP_TMP   - see above
// 
// CLEAR_SWAP - move assignment implemented as clear+swap, 
//              i.e. this->clear(); swap(rhs);
//
//            - otherwise, attempt is made to implement absolutely
//              minimal move assignment

// How I tested vector:
//
// for x in CLEAR_SWAP SWAP_TMP MINIMAL; do
//   echo && echo "mode:" $x
//   g++-4.4 -DNDEBUG -D$x -std=c++0x -O3 canonical-assign.cpp -o test && time ./test 1000 2000
// done
//

// How I tested array:
//
// for x in SWAP_TMP SIMPLE_MINDED_MOVE MINIMAL; do
//   echo && echo "mode:" $x
//   g++-4.4 -DNDEBUG -DARRAY -D$x -std=c++0x -O3 canonical-assign.cpp -o test && time ./test 500
// done
//

// Note: these tests could be done more carefully (e.g. warming up the
// cache; throwing out test runs; timing only the relevant part of the
// execution).  I did some simple checks to see if that would make any
// difference and concluded that it didn't, at least on my system.

template <class T>
struct alloc
{
    T* allocate(std::size_t n) const
    {
        return static_cast<T*>(operator new(n * sizeof(T)));
    }
    void deallocate(T* p, std::size_t n) const
    {
        return operator delete(p);
    }
};

template <class T, class A = alloc<T> >
struct vbase : private A
{
    vbase()
        : begin_(0), end_(0), cap_(0)
    {}
    
    vbase(std::size_t n)
        : begin_(this->allocate(n))
        , end_(begin_)
        , cap_(begin_ + n)
    {}

    vbase(vbase const& rhs)
        : A(static_cast<A const&>(rhs))
        , begin_(this->allocate(rhs.size()))
        , end_(begin_)
        , cap_(begin_ + rhs.size())
    {
    }

    vbase(vbase&& rhs)
        : A( std::move(static_cast<A const&>(rhs)) )
        , begin_( rhs.begin_ )
        , end_( rhs.end_ )
        , cap_( rhs.cap_ )
    {
        rhs.begin_ = rhs.end_ = rhs.cap_ = 0;
    }

    vbase& operator=(vbase&& rhs)
    {
        for (T* e = end_, *b = begin_; e != b;)
            (--e)->~T();

        std::swap(this->begin_, rhs.begin_);
        std::swap(this->cap_, rhs.cap_);
        this->end_ = rhs.end_;
        rhs.end_ = rhs.begin_;
        return *this;
    }
    
    vbase& operator=(vbase const& rhs)
    {
        {
            A &x = *this;
            x = rhs;
        }
        
        if (capacity() < rhs.size())
        {
            this->clear();
            this->reallocate_(rhs.size());
        }

        T* src = rhs.begin_;
        T* const e = rhs.end_;
        T* dst = this->begin_;
            
        while (src != e && dst != this->end_)
            (*dst++) = (*src++);
            
        while (src != e)
            this->push_back(*src++);
    }

    friend inline void swap(vbase& v1, vbase& v2)
    {
        v1.swap(v2);
    }

    void swap(vbase& rhs)
    {
        std::swap(this->begin_, rhs.begin_);
        std::swap(this->end_, rhs.end_);
        std::swap(this->cap_, rhs.cap_);
    }
    
    friend inline bool operator<(vbase const& v1, vbase const& v2)
    {
        return std::lexicographical_compare(v1.begin(), v1.end(), v2.begin(), v2.end());
    }
    
    std::size_t size() const { return end_ - begin_; }
    std::size_t capacity() const { return cap_ - begin_; }
    
    void push_back(T x)
    {
        if (end_ == cap_)
            reallocate_(size() ? size() * 2 : 1);
        new (end_) T(std::move(x));
        ++end_;
    }

    void pop_back()
    {
        (--end_)->~T();
    }
    
    ~vbase()
    {
        for (T* p = begin_, *e = end_; p != e ; ++p)
            p->~T();
        if (begin_)
            this->deallocate(begin_, cap_ - begin_);
    }

    void clear()
    {
        for (T* b = begin_, *e = end_; e != b;)
            (--e)->~T();
        end_ = begin_;
    }

    void reserve(std::size_t n)
    {
        if (capacity() < n)
            reallocate_(n);
    }

    void resize(std::size_t n)
    {
        if (size() > n)
        {
            do { pop_back(); }
            while(size() > n);
        }
        else if (size() < n)
        {
            reserve(n);
            do { push_back(T()); }
            while(size() < n);
        }
    }
    
    T const* begin() const { return begin_; }
    T const* end() const { return end_; }
    
    T* begin() { return begin_; }
    T* end() { return end_; }
    
    T& operator[](std::size_t x) { return begin_[x]; }
    T const& operator[](std::size_t x) const { return begin_[x]; }
    
 protected:
    void reallocate_(std::size_t n)
    {
        vbase x;
        x.begin_ = x.end_ = this->allocate(n);
        x.cap_ = x.begin_ + n;
        
        for (T* p = begin_, *e = end_, *q = x.begin_; p != e; ++p, ++q)
            *q = std::move(*p);
        
        x.end_ += size();
        this->swap(x);
    }
    
    T* begin_;
    T* end_;
    T* cap_;
};

template <class T, class A = alloc<T> >
struct vector : vbase<T,A>
{
    typedef T* iterator;
    typedef vbase<T,A> base;
    
    vector() {}
    
    vector(std::size_t n)
        : base(n)
    {
        for (;this->end_ != this->cap_;)
        {
            new (this->end_++) T;
        }
    }

    vector(std::size_t n, T const& x)
        : base(n)
    {
        for (;this->end_ != this->cap_;)
        {
            new (this->end_++) T(x);
        }
    }

    vector(vector const& rhs)
        : base( rhs )
    {
        for (T* p = rhs.begin_, *e = rhs.end_; p != e; ++p, ++this->end_)
            new (this->end_) T(*p);
    }
    
    vector(vector&& rhs)
        : base( std::move(rhs) )
    {
    }

    vector& operator=(vector const& rhs)
    {
        base& dst = *this;
        dst = rhs;
        return *this;
    }

    vector& operator=(vector&& rhs)
    {
#ifdef CLEAR_SWAP
        this->clear();
        this->swap(rhs);
#elif defined SWAP_TMP 
        vector(std::move(rhs)).swap(*this);
#else 
        base& dst = *this;
        dst = std::move(rhs);
#endif
        return *this;
    }
};

// A loop-unrolled swap of two arrays of length N
template <std::size_t N, class T>
inline void
array_swap(T* lhs, T* rhs)
{
    if (N == 0) return;
    
    if (N == 1)
    {
        using std::swap;
        swap(*lhs, *rhs);
    }
    else
    {
        array_swap<N/2>(lhs, rhs);
        array_swap<N-(N/2)>(lhs + N/2, rhs + N/2);
    }
}

// A loop-unrolled move of two arrays of length N
template <std::size_t N, class T>
inline void
array_move(T* lhs, T* rhs)
{
    if (N == 0) return;
    
    if (N == 1)
    {
        *lhs = std::move(*rhs);
    }
    else
    {
        array_move<N/2>(lhs, rhs);
        array_move<N-(N/2)>(lhs + N/2, rhs + N/2);
    }
}

template <class T, std::size_t N>
struct array
{
    typedef T* iterator;
    
    T const* begin() const { return storage; }
    T const* end() const { return begin() + N; }
    
    T* begin() { return storage; }
    T* end() { return begin() + N; }

    array& operator=(array const& rhs)
    {
        std::copy(rhs.begin(), rhs.end(), this->begin());
        return *this;
    }
    
    array& operator=(array&& rhs)
    {
#ifdef SWAP_TMP
        array(std::move(rhs)).swap(*this);
#else
        
# ifdef SIMPLE_MINDED_MOVE
        for (std::size_t i = 0; i < N; ++i)
            (*this)[i] = std::move(rhs[i]);
# elif defined(LOOP_UNROLLED_MOVE)
        array_move<N>(storage, rhs.storage);
# else 
        // libstdc++ (the GCC standard lib) has optimizations that can
        // make this up to 2x faster than the simple-minded
        // implementation, e.g. when possible this call becomes a
        // memmove
        std::copy(std::make_move_iterator(rhs.begin()), std::make_move_iterator(rhs.end()), begin());
# endif
        
#endif
        return *this;
    }

    void swap(array& rhs)
    {
#ifdef SIMPLE_MINDED_MOVE
        for (std::size_t i = 0; i < N; ++i)
        {
            using std::swap;
            swap((*this)[i], rhs[i]);
        }
#else
        // Completely unrolling this loop gives a 30% speedup
        array_swap<N>(storage, rhs.storage);
#endif 
    }

    std::size_t size() const { return N; }
    
    friend inline void swap(array& lhs, array& rhs)
    {
        lhs.swap(rhs);
    }
    
    friend inline bool operator<(array const& v1, array const& v2)
    {
        return std::lexicographical_compare(v1.begin(), v1.end(), v2.begin(), v2.end());
    }

    T& operator[](std::size_t x) { return storage[x]; }
    T const& operator[](std::size_t x) const { return storage[x]; }
    
    T storage[N];
};

int main(int argc, char* argv[])
{
    unsigned const length = argc > 1 ? std::atoi(argv[1]) : 1000;
    unsigned const reps = argc > 2 ? std::atoi(argv[2]) : 1000;
    unsigned elen = argc > 3 ? std::atoi(argv[3]) : 
#ifdef ARRAY
        100;
    typedef array<int,100> elt;
#else
    5;
    typedef vector<int> elt;
#endif 
    
    std::cout << "allocate " << length << std::endl;
    
    vector<elt> x(length, elt());

    std::cout << "initialize " << x.size() << std::endl;
    typedef vector<elt>::iterator it;
    
    for (it p = x.begin(), e = x.end(); p != e; ++p)
    {
#ifndef ARRAY
        p->resize(elen);
#endif
        for (unsigned i = 0; i < p->size(); ++i)
        {
            (*p)[i] = rand();
        }
    }

    std::cout << "work" << std::endl;
    it b = x.begin();
    it e = x.end();
    
    for (unsigned i = 0; i < reps; ++i)
    {
        for (it p = b; p != e; ++p)
            std::rotate(b, p, e);
    }
    
    std::cout << "done" << std::endl;
}

