// B-tree map of numbers to numbers -*- c++ -*-

#ifdef __GNUC__
# pragma implementation
#endif // __GNUC__
#include "BTree.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>

/** @file BTree.C
 * B-tree map of numbers to numbers
 */

/* Copyright  2000-2003 Marko Mkel (msmakela@tcs.hut.fi).

   This file is part of MARIA, a reachability analyzer and model checker
   for high-level Petri nets.

   MARIA is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   MARIA is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   The GNU General Public License is often shipped with GNU software, and
   is generally kept in a file called COPYING or LICENSE.  If you do not
   have a copy of the license, write to the Free Software Foundation,
   59 Temple Place, Suite 330, Boston, MA 02111 USA. */

/** Size of a b-tree page, in bytes */
#define BTREE_PAGE_SIZE (sizeof (BTree::item_t) * BTREE_SIZE)

/** Flag for leaf items */
#define BTREE_LEAF ~((~0u) >> 1)

/** Determine the amount of keys in a B-tree node */
inline static unsigned
keys (const BTree::item_t item) { return item & ~BTREE_LEAF; }
/** Determine whether a B-tree node is a leaf node */
inline static bool
isLeaf (const BTree::item_t item) { return item & BTREE_LEAF; }

#ifdef USE_MMAP
# define myRoot reinterpret_cast<BTree::item_t*>(myFile.addr)
# define myNumPages (myFile.len / BTREE_PAGE_SIZE)

/** Map a page from the file
 * @param f	the file
 * @param page	page number
 * @return	the address of the page
 */
inline static BTree::item_t*
mapPage (const file_t& f,
	 unsigned page)
{
  assert (page < f.len / BTREE_PAGE_SIZE);
  return reinterpret_cast<BTree::item_t*>(f.addr) + page * BTREE_SIZE;
}
#endif // USE_MMAP
/** Read a page from the file
 * @param f	the file
 * @param page	page number
 * @param buf	(output) the page buffer
 */
inline static void
readPage (const file_t& f,
	  unsigned page,
	  BTree::item_t* buf)
{
#ifdef USE_MMAP
  assert (page < f.len / BTREE_PAGE_SIZE);
  memcpy (buf, static_cast<char*>(f.addr) + page * BTREE_PAGE_SIZE,
	  BTREE_PAGE_SIZE);
#else // USE_MMAP
  fseek (f, page * BTREE_PAGE_SIZE, SEEK_SET);
  fread (buf, BTREE_PAGE_SIZE, 1, f);
#endif // USE_MMAP
}

/** Write a page without extending the file
 * @param f	the file
 * @param page	page number
 * @param buf	the page buffer
 */
inline static void
writePage (const file_t& f,
	   unsigned page,
	   const BTree::item_t* buf)
{
#ifdef USE_MMAP
  assert (page < f.len / BTREE_PAGE_SIZE);
  memcpy (static_cast<char*>(f.addr) + page * BTREE_PAGE_SIZE, buf,
	  BTREE_PAGE_SIZE);
#else // USE_MMAP
  fseek (f, page * BTREE_PAGE_SIZE, SEEK_SET);
  fwrite (buf, BTREE_PAGE_SIZE, 1, f);
#endif
}

/** Write a page, extending the file if needed
 * @param f	the file
 * @param page	page number
 * @param buf	the page buffer
 */
inline static void
extendPage (file_t& f,
	    unsigned page,
	    const BTree::item_t* buf)
{
#ifdef USE_MMAP
  long offset = page * BTREE_PAGE_SIZE;
  assert (offset <= f.len);
  if (offset == f.len) {
    f.len += BTREE_PAGE_SIZE;
    if (f.len > f.alloc) {
# ifdef NO_MMAP
      f.alloc *= 2;
      if (!(f.addr = realloc (f.addr, f.alloc))) {
	perror ("BTree: realloc");
	abort ();
      }
# else // NO_MMAP
      if (f.addr)
	munmap (f.addr, f.alloc);
      if (ftruncate (f.fd, f.alloc *= 2)) {
	perror ("BTree: ftruncate");
	abort ();
      }
      if ((f.addr =
#  ifdef __sun
	   (caddr_t)
#  endif // __sun
	   mmap (0, f.alloc, PROT_READ | PROT_WRITE, MAP_SHARED, f.fd, 0)) ==
	  reinterpret_cast<void*>(MAP_FAILED)) {
	perror ("BTree: mmap");
	abort ();
      }
# endif // NO_MMAP
    }
  }
  memcpy (static_cast<char*>(f.addr) + page * BTREE_PAGE_SIZE, buf,
	  BTREE_PAGE_SIZE);
#else // USE_MMAP
  fseek (f, page * BTREE_PAGE_SIZE, SEEK_SET);
  if (1 != fwrite (buf, BTREE_PAGE_SIZE, 1, f)) {
    perror ("fwrite");
    abort ();
  }
#endif
}

/** Get the lower and upper bound for indexes containing a key
 * @param page		a B-tree node
 * @param key		the key to be sought
 * @param low		(output) the lower bound, inclusive
 * @param high		(output) the upper bound, inclusive
 * @return		true if the key was found
 */
static bool
searchBounds (const BTree::item_t* page, BTree::item_t key,
	      unsigned& low, unsigned& high)
{
  low = 1;
  high = keys (*page);
  if (!high)
    return false;
  assert (high >= low && high < BTREE_SIZE);
  // apply binary search to find coarse low and high bounds for the key
  for (;;) {
    const unsigned i = (low + high) >> 1;
    const BTree::item_t p = page[i];
    if (p == key) {
      // set the lower bound
      for (low = i; --low >= 1 && page[low] == key; );
      low++;
      // set the upper bound
      for (high = i; ++high <= keys (*page) && page[high] == key; );
      high--;
      return true;
    }
    if (p < key)
      low = i + 1;
    else
      high = i - 1;
    if (high < low)
      return false;
  }
}

/** Search a leaf node
 * @param page		a leaf node
 * @param key		the key to be sought
 * @return		the values (item 0: amount of the values)
 */
static BTree::item_t*
searchLeaf (const BTree::item_t* page, BTree::item_t key)
{
  assert (isLeaf (*page));
  unsigned low, high;
  if (!searchBounds (page, key, low, high))
    return 0;
  // compose the result
  BTree::item_t* result;
  high -= low - 1;
  *(result = new BTree::item_t[1 + high]) = high;
  memcpy (result + 1, &page[BTREE_SIZE / 2 + low], high * sizeof *result);
  return result;
}

#ifndef USE_MMAP
/** Search the tree
 * @param page		node to start the search from
 * @param key		the key to be sought
 * @param file		the B-tree file
 * @return		the values (item 0: amount of the values)
 */
static BTree::item_t*
search (BTree::item_t* page, BTree::item_t key, const file_t& file)
{
  for (;;) {
    if (isLeaf (*page))
      return searchLeaf (page, key);
    unsigned low, high;
    if (!searchBounds (page, key, low, high)) {
      assert (low <= keys (*page) + 1);
      assert (page[(BTREE_SIZE / 2 - 1) + low] > 0);
      readPage (file, page[(BTREE_SIZE / 2 - 1) + low], page);
      continue;
    }
    BTree::item_t* result = 0;
    for (; high >= low; high--) {
      BTree::item_t p[BTREE_SIZE];
      readPage (file, page[(BTREE_SIZE / 2 - 1) + high], p);
      BTree::item_t* r = search (p, key, file);
      if (r) {
	if (!result) result = r;
	else {
	  BTree::item_t* r2 = new BTree::item_t[*result + *r + 1];
	  *r2 = *result + *r;
	  memcpy (r2 + 1, result + 1, *result * sizeof *result);
	  memcpy (r2 + 1 + *result, r + 1, *r * sizeof *r);
	  delete[] result;
	  delete[] r;
	  result = r2;
	}
      }
    }
    return result;
  }
}
#endif // !USE_MMAP

/** Search the tree
 * @param page		node to start the search from
 * @param key		the key to be sought
 * @param file		the B-tree file
 * @return		the values (item 0: amount of the values)
 */
static BTree::item_t*
search (const BTree::item_t* page, BTree::item_t key, const file_t& file)
{
#ifndef USE_MMAP
  BTree::item_t p[BTREE_SIZE], p2[BTREE_SIZE];
#endif // !USE_MMAP
  for (;;) {
    if (isLeaf (*page))
      return searchLeaf (page, key);
    unsigned low, high;
    if (!searchBounds (page, key, low, high)) {
      assert (low <= keys (*page) + 1);
      assert (page[(BTREE_SIZE / 2 - 1) + low] > 0);
#ifdef USE_MMAP
      page = mapPage (file, page[(BTREE_SIZE / 2 - 1) + low]);
#else // USE_MMAP
      readPage (file, page[(BTREE_SIZE / 2 - 1) + low], p);
      page = p;
#endif // USE_MMAP
      continue;
    }
    BTree::item_t* result = 0;
    for (high++; high >= low; high--) {
#ifdef USE_MMAP
      BTree::item_t* r = search
	(mapPage (file, page[(BTREE_SIZE / 2 - 1) + high]), key, file);
#else // USE_MMAP
      readPage (file, page[(BTREE_SIZE / 2 - 1) + high], p2);
      BTree::item_t* r = search (p2, key, file);
#endif // USE_MMAP
      if (r) {
	if (!result) result = r;
	else {
	  BTree::item_t* r2 = new BTree::item_t[*result + *r + 1];
	  *r2 = *result + *r;
	  memcpy (r2 + 1, result + 1, *result * sizeof *result);
	  memcpy (r2 + 1 + *result, r + 1, *r * sizeof *r);
	  delete[] result;
	  delete[] r;
	  result = r2;
	}
      }
    }
    return result;
  }
}
/** Split a child node
 * @param parent	the parent node
 * @param child		the child node
 * @param i		index of the child node in the parent node
 * @param parentpage	page number of the parent node
 * @param childpage	page number of the child node
 * @param numpages	number of pages in the B-tree
 * @param file		the B-tree file
 */
static void
split (BTree::item_t* parent,
       BTree::item_t* child,
       unsigned i,
#ifndef USE_MMAP
       unsigned parentpage,
       unsigned childpage,
       unsigned& numpages,
#endif // !USE_MMAP
       file_t& file)
{
  assert (keys (*child) == BTREE_SIZE / 2 - 1 &&
	  i && i - 1 <= keys (*parent));
  BTree::item_t child2[BTREE_SIZE];
  memset (child2, 0, BTREE_PAGE_SIZE);
  memcpy (child2 + 1, child + (BTREE_SIZE / 4 + 1),
	  (BTREE_SIZE / 4 - 1) * sizeof *child);
  memcpy (child2 + BTREE_SIZE / 2, child + (BTREE_SIZE * 3 / 4),
	  (BTREE_SIZE / 4) * sizeof *child);
  if (isLeaf (*child)) {
    child[0] = (BTREE_SIZE / 4) | BTREE_LEAF;
    child2[0] = (BTREE_SIZE / 4 - 1) | BTREE_LEAF;
  }
  else
    child2[0] = child[0] = BTREE_SIZE / 4 - 1;
  if (i <= keys (*parent)) {
    memmove (parent + i + (BTREE_SIZE / 2 + 1),
	     parent + i + BTREE_SIZE / 2,
	     (keys (*parent) + 1 - i) * sizeof *parent);
    memmove (parent + i + 1, parent + i,
	     (keys (*parent) + 1 - i) * sizeof *parent);
  }
  parent[i] = child[BTREE_SIZE / 4];
  parent[0]++;
  memset (child + (BTREE_SIZE / 4 + 1), 0,
	  (BTREE_SIZE / 4 - 1) * sizeof *child);
  if (isLeaf (*child))
    memset (child + (BTREE_SIZE * 3 / 4 + 1), 0,
	    (BTREE_SIZE / 4 - 1) * sizeof *child);
  else
    memset (child + (BTREE_SIZE * 3 / 4), 0,
	    (BTREE_SIZE / 4) * sizeof *child);
#ifdef USE_MMAP
  extendPage (file, parent[BTREE_SIZE / 2 + i] = file.len / BTREE_PAGE_SIZE,
	      child2);
#else // USE_MMAP
  extendPage (file, childpage, child);
  extendPage (file, parent[BTREE_SIZE / 2 + i] = numpages++, child2);
  writePage (file, parentpage, parent);
#endif // USE_MMAP
}

/** Insert to a non-full node
 * @param pagenbr	page number of the node to insert to
 * @param key		the key
 * @param value		the value
 * @param root		the root page
 * @param numpages	number of pages in the B-tree
 * @param file		the B-tree file
 */
static void
insert (unsigned pagenbr,
	BTree::item_t key,
	BTree::item_t value,
#ifndef USE_MMAP
	BTree::item_t* root,
	unsigned& numpages,
#endif // !USE_MMAP
	file_t& file)
{
  for (;;) {
#ifdef USE_MMAP
    BTree::item_t* page = mapPage (file, pagenbr);
#else // USE_MMAP
    BTree::item_t page[BTREE_SIZE];
    if (!pagenbr)
      memcpy (page, root, BTREE_PAGE_SIZE);
    else
      readPage (file, pagenbr, page);
#endif // USE_MMAP

    unsigned i = keys (*page);
    assert (i < BTREE_SIZE / 2 - 1);
    if (isLeaf (*page)) {
      // to do: apply binary search
      while (i && key <= page[i]) {
	page[i + 1] = page[i];
	page[i + (BTREE_SIZE / 2 + 1)] = page[i + (BTREE_SIZE / 2)];
	i--;
      }
      page[i + 1] = key;
      page[i + (BTREE_SIZE / 2 + 1)] = value;
      page[0]++;
#ifndef USE_MMAP
      writePage (file, pagenbr, page);
      if (!pagenbr)
	memcpy (root, page, BTREE_PAGE_SIZE);
#endif // USE_MMAP
      return;
    }

    // to do: apply binary search
    while (i && key <= page[i]) i--;
    unsigned childpagenbr = page[(BTREE_SIZE / 2) + i++];
    assert (childpagenbr > 0);
#ifdef USE_MMAP
    BTree::item_t* child = mapPage (file, childpagenbr);
#else // USE_MMAP
    BTree::item_t child[BTREE_SIZE];
    readPage (file, childpagenbr, child);
#endif // USE_MMAP
    if (keys (*child) == (BTREE_SIZE / 2 - 1)) {
      split (page, child, i,
#ifndef USE_MMAP
	     pagenbr, childpagenbr, numpages,
#endif // !USE_MMAP
	     file);
#ifdef USE_MMAP
      page = mapPage (file, pagenbr);
#else // USE_MMAP
      if (!pagenbr)
	memcpy (root, page, BTREE_PAGE_SIZE);
#endif // USE_MMAP
      if (key > page[i]) i++;
      pagenbr = page[(BTREE_SIZE / 2 - 1) + i];
      assert (pagenbr > 0);
    }
    else
      pagenbr = childpagenbr;
  }
}

BTree::BTree (file_t file) :
  myFile (file)
#ifndef USE_MMAP
  , myNumPages (1)
#endif // USE_MMAP
{
#ifdef USE_MMAP
  if (myFile.len < long (BTREE_PAGE_SIZE)) {
    item_t root[BTREE_SIZE];
    memset (root, 0, sizeof root);
    root[0] = BTREE_LEAF;
    extendPage (myFile, 0, root);
  }
#else // USE_MMAP
  if (1 != fread (myRoot, BTREE_PAGE_SIZE, 1, myFile)) {
    memset (myRoot, 0, BTREE_PAGE_SIZE);
    myRoot[0] = BTREE_LEAF;
    extendPage (myFile, 0, myRoot);
  }
#endif // USE_MMAP
}

BTree::~BTree ()
{
#ifdef USE_MMAP
# ifdef NO_MMAP
  if (myFile.addr)
    free (myFile.addr);
# else // NO_MMAP
  if (myFile.addr)
    munmap (myFile.addr, myFile.alloc);
  if (myFile.fd != -1) {
    ftruncate (myFile.fd, myFile.len);
    close (myFile.fd);
  }
# endif // NO_MMAP
#else // USE_MMAP
  fclose (myFile);
#endif // USE_MMAP
}

BTree::item_t*
BTree::search (item_t key) const
{
  return ::search (myRoot, key, myFile);
}

void
BTree::insert (item_t key, item_t value)
{
  if (keys (*myRoot) == BTREE_SIZE / 2 - 1) {
    item_t page[BTREE_SIZE];
    memcpy (page, myRoot, BTREE_PAGE_SIZE);
    memset (myRoot, 0, BTREE_PAGE_SIZE);
    myRoot[0] = 0;
#ifdef USE_MMAP
    unsigned last = myNumPages;
    extendPage (myFile, myRoot[BTREE_SIZE / 2] = last, page);
    ::split (myRoot, mapPage (myFile, last), 1, myFile);
#else // USE_MMAP
    myRoot[BTREE_SIZE / 2] = myNumPages++;
    ::split (myRoot, page, 1, 0, myNumPages - 1, myNumPages, myFile);
#endif // USE_MMAP
  }
  ::insert (0, key, value,
#ifndef USE_MMAP
	    myRoot, myNumPages,
#endif // !USE_MMAP
	    myFile);
}

void
BTree::clear ()
{
  memset (myRoot, 0, sizeof (BTREE_PAGE_SIZE * sizeof *myRoot));
  myRoot[0] = BTREE_LEAF;
#ifdef USE_MMAP
  assert (myFile.len >= long (BTREE_PAGE_SIZE));
  myFile.len = BTREE_PAGE_SIZE;
#else // USE_MMAP
  myNumPages = 1;
  ::extendPage (myFile, 0, myRoot);
#endif // USE_MMAP
}
