/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include <udm_config.h>

#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include <sys/types.h>
#ifdef   HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_WINSOCK_H
#include <winsock.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef HAVE_ARPA_NAMESER_H
#include <arpa/nameser.h>
#endif
#ifdef HAVE_RESOLV_H
#include <resolv.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#ifdef HAVE_ZLIB
#include <zlib.h>
#endif

#include "udm_store.h"
#include "udm_services.h"
#include "udm_xmalloc.h"
#include "udm_hash.h"
#include "udm_utils.h"
#include "udm_log.h"
#include "udm_vars.h"
#include "udm_parsehtml.h"
#include "udm_unicode.h"
#include "udm_unidata.h"
#include "udm_searchtool.h"
#include "udm_sgml.h"
#include "udm_sqldbms.h"
#include "udm_mutex.h"


/** Store compressed copy of document
    \return UDM_OK if successful, UDM_ERROR on error
 */

__C_LINK int __UDMCALL UdmStoreDoc(UDM_AGENT *Agent, UDM_DOCUMENT *Doc) {

#ifdef HAVE_ZLIB
  const char *hello = "S\0";
  char result[8];
  int s;
  size_t content_size=Doc->Buf.size - (Doc->Buf.content-Doc->Buf.buf);
  udmhash32_t rec_id = UdmVarListFindInt(&Doc->Sections, "URL_ID", 0);
  size_t dbnum = ((size_t)rec_id) % Agent->Conf->dbl.nitems;
  
  if ((s = Agent->Conf->dbl.db[dbnum].stored_sd) <= 0) return UDM_ERROR;
  
  UDM_GETLOCK(Agent, UDM_LOCK_STORED);

  /* FIXME: add checking of send() results */
  UdmSend(s, hello, 1, 0);
  UdmSend(s, &rec_id, sizeof(rec_id), 0);
  UdmSend(s, &content_size, sizeof(content_size), 0);
  UdmSend(s, Doc->Buf.content, content_size, 0);
  
  if (UdmRecvall(s, result, 1) < 0) {
    UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
    return UDM_ERROR;
  }
  
  UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
  return (*result == 'O') ? UDM_OK : UDM_ERROR;
  
#else
  return UDM_OK;
#endif

}


/** Retrieve cached copy
    Caller must alloc Doc->Buf.buf
 */

__C_LINK int __UDMCALL UdmUnStoreDoc(UDM_AGENT *Agent, UDM_DOCUMENT *Doc) {
  
#ifdef HAVE_ZLIB
  const char *hello = "G\0";
  int s;
  udmhash32_t rec_id;
  size_t content_size = 0, dbnum;
  ssize_t nread;
  
  rec_id = UdmVarListFindInt(&Doc->Sections, "URL_ID", 0);
  Doc->Buf.size=0;
  dbnum = ((size_t)rec_id) % Agent->Conf->dbl.nitems;
  
  if ((s = Agent->Conf->dbl.db[dbnum].stored_sd) < 0) return -1;
  
  UDM_GETLOCK(Agent, UDM_LOCK_STORED);

  /* FIXME: add send() results checking */
  UdmSend(s, hello, 1, 0);
  UdmSend(s, &rec_id, sizeof(rec_id), 0);
  
  if (
       (UdmRecvall(s, &content_size, sizeof(content_size)) < 0) ||
       (content_size == 0)     )   {
    UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
    return -1;
  }
 
  if (Doc->Buf.buf == NULL) {
    Doc->Buf.buf = (char*) malloc(content_size + 1);
  }
  if (
      (Doc->Buf.buf == NULL) ||
       ((nread = UdmRecvall(s, Doc->Buf.buf, content_size)) < 0)
      ) {
    UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
    return -1;
  }
  UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
  
  Doc->Buf.buf[nread] = '\0';
  Doc->Buf.size = nread;
  Doc->Buf.content=Doc->Buf.buf;
  return 0;
  
#else
  return -1;
#endif
}

/** Delete document from stores database
 */

__C_LINK int __UDMCALL UdmStoreDeleteDoc(UDM_AGENT *Agent, UDM_DOCUMENT *Doc) {

#ifdef HAVE_ZLIB
  const char *hello = "D\0";
  int s;
  udmhash32_t rec_id =  UdmVarListFindInt(&Doc->Sections, "URL_ID", 0);
  size_t dbnum = ((size_t)rec_id) % Agent->Conf->dbl.nitems;

  if ((s = Agent->Conf->dbl.db[dbnum].stored_sd) < 0) return -1;
  
  UDM_GETLOCK(Agent, UDM_LOCK_STORED);

  UdmSend(s, hello, 1, 0);
  UdmSend(s, &rec_id, sizeof(rec_id), 0);
  
  UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
  return 0;

#else
  return -1;
#endif

}

/** Check document presence in stored database
 */

__C_LINK int __UDMCALL UdmStoreCheckUp(UDM_AGENT *Agent) {

#ifdef HAVE_ZLIB
  const char *hello = "C\0";
  int s;
  size_t i, dbfrom = 0, dbto =  Agent->Conf->dbl.nitems;

  UDM_GETLOCK(Agent, UDM_LOCK_STORED);
  for (i = dbfrom; i < dbto; i++) {
    if ((s = Agent->Conf->dbl.db[i].stored_sd) < 0) return -1;
  
    UdmSend(s, hello, 1, 0);
  }
  UDM_RELEASELOCK(Agent, UDM_LOCK_STORED);
  return 0;

#else
  return -1;
#endif

}

/* ** ** ** */

static void UdmNextCharB_stored(void *d) {
  UDM_HTMLTOK *t = (UDM_HTMLTOK *)d;
  if (!t->finished && ((t->b - t->Content) > t->chunks * UDM_DOCHUNKSIZE - 32)) {
    char *OldContent = t->Content;
    size_t ChunkSize, i;
    t->Content = (char*)realloc(t->Content, (size_t)(t->chunks + 1) * UDM_DOCHUNKSIZE + 1);
    t->chunks++;
    UdmSend(t->socket, &t->chunks, sizeof(t->chunks), 0);
    UdmRecvall(t->socket, &ChunkSize, sizeof(ChunkSize));
    UdmRecvall(t->socket, &t->Content[(t->chunks-1) * UDM_DOCHUNKSIZE], ChunkSize);
    if (ChunkSize != UDM_DOCHUNKSIZE) {
      int z = 0;
      UdmSend(t->socket, &z, sizeof(z), 0);
      t->finished = 1;
    }
    t->Content[(t->chunks-1) * UDM_DOCHUNKSIZE + ChunkSize] = '\0';
    if (t->Content != OldContent) {
      t->e = t->Content + (t->e - OldContent);
      t->b = t->Content + (t->b - OldContent);
      t->s = t->Content + (t->s - OldContent);
      *(t->lt) = t->Content + (*(t->lt) - OldContent);
      for (i = 0; i < t->ntoks; i++) {
     t->toks[i].name = (t->toks[i].name) ? t->Content + (t->toks[i].name - OldContent) : NULL;
     t->toks[i].val = (t->toks[i].val) ? t->Content + (t->toks[i].val - OldContent) : NULL;
      }
    }
  }
  (t->b)++;
}

static void UdmNextCharE_stored(void *d) {
  UDM_HTMLTOK *t = (UDM_HTMLTOK *)d;
  if (!t->finished && ((t->e - t->Content) > t->chunks * UDM_DOCHUNKSIZE - 32)) {
    char *OldContent = t->Content;
    size_t ChunkSize, i;
    t->Content = (char*)realloc(t->Content, (size_t)(t->chunks + 1) * UDM_DOCHUNKSIZE + 1);
    t->chunks++;
    UdmSend(t->socket, &t->chunks, sizeof(t->chunks), 0);
    UdmRecvall(t->socket, &ChunkSize, sizeof(ChunkSize));
    UdmRecvall(t->socket, &t->Content[(t->chunks-1) * UDM_DOCHUNKSIZE], ChunkSize);
    if (ChunkSize != UDM_DOCHUNKSIZE) {
      int z = 0;
      UdmSend(t->socket, &z, sizeof(z), 0);
      t->finished = 1;
    }
    t->Content[(t->chunks-1) * UDM_DOCHUNKSIZE + ChunkSize] = '\0';
    if (t->Content != OldContent) {
      t->e = t->Content + (t->e - OldContent);
      t->b = t->Content + (t->b - OldContent);
      t->s = t->Content + (t->s - OldContent);
      *(t->lt) = t->Content + (*(t->lt) - OldContent);
      for (i = 0; i < t->ntoks; i++) {
     t->toks[i].name = (t->toks[i].name) ? t->Content + (t->toks[i].name - OldContent) : NULL;
     t->toks[i].val = (t->toks[i].val) ? t->Content + (t->toks[i].val - OldContent) : NULL;
      }
    }
  }
  (t->e)++;
}

static int UdmUniNSpace(int c) {
     if (c == 0x0020) return 0;
     if (c == 0x0026) return 0;
     if (c == 0x00A0) return 0;
     if (c == 0x1680) return 0;
     if ((c >= 0x2000) && (c <= 0x200B)) return 0;
     if (c == 0x202F) return 0;
     if (c == 0x3000) return 0;
     return 1;
}

static int * UdmUniStrWWL(int *s, UDM_WIDEWORDLIST *wwl, int *c, size_t *len) {
  int sc;
  register size_t i;

  while((sc = UdmUniToLower(*s++)) != 0) {
    for(i = 0; i < wwl->nwords; i++) {
      if ((sc == c[i]) && (wwl->Word[i].origin != UDM_WORD_ORIGIN_STOP)) {
     if ((UdmUniStrNCaseCmp(s, &(wwl->Word[i].uword[1]), len[i]) == 0)
         && ( (UdmUniNSpace(s[len[i]]) == 0) || (s[len[i]] == 0) || (s[len[i]] < 0x30 )  )  ) {
       s--;
       return s;
     }
      }
    }
  }
  return NULL;
}

/** Make document excerpts on query words forms 
 */

__C_LINK char * __UDMCALL UdmExcerptDoc(UDM_AGENT *query, UDM_RESULT *Res, UDM_DOCUMENT *Doc, size_t size) {
  char *HDoc,*HEnd;
  const char *htok, *last;
  const char *lcharset;
  UDM_CHARSET *lcs = NULL, *dcs = NULL, *sys_int;
  UDM_HTMLTOK tag;
  int *start, *end, *prevend, *uni, ures, *p, *oi, dot[] = {0x2e, 0x2e, 0x2e, 0}, *np, add;
  char *os;
  int *c, s = -1;
  size_t *wlen, i, len, maxwlen = 0, ulen, prevlen;
  UDM_CONV dc_uni, uni_lc;
  const char *hello = "E\0";
  udmhash32_t rec_id;
  size_t ChunkSize, DocSize, dbnum;
  char *Source = NULL;
  int needFreeSource = 1;

  if (query->Conf->lcs == NULL) {
    lcharset = UdmVarListFindStr(&query->Conf->Vars, "CS", "");
    if (lcharset == NULL || (!strcmp(lcharset, ""))) {
      lcharset = UdmVarListFindStr(&query->Conf->Vars, "LocalCharset", "iso-8859-1");
    }
    lcs = UdmGetCharSet(lcharset);
  } else {
    lcs = query->Conf->lcs;
  }
  dcs = UdmGetCharSet(UdmVarListFindStr(&Doc->Sections,"Charset","iso-8859-1"));
  
  if (!lcs || !dcs) return NULL;
  if (!(sys_int=UdmGetCharSet("sys-int")))
    return NULL;
  
  UdmConvInit(&dc_uni, dcs, sys_int, UDM_RECODE_HTML);
  UdmConvInit(&uni_lc,sys_int,lcs,UDM_RECODE_HTML);

  c = (int *) malloc(Res->WWList.nwords * sizeof(int));
  if (c == NULL) {  return NULL; }
  wlen = (size_t *) malloc(Res->WWList.nwords * sizeof(size_t));
  if (wlen == NULL) {
    UDM_FREE(c);
    return NULL;
  }
  for (i = 0; i < Res->WWList.nwords; i++) {
    wlen[i] = Res->WWList.Word[i].len - 1;
    c[i] = Res->WWList.Word[i].uword[0];
    if (wlen[i] > maxwlen) maxwlen = wlen[i];
  }
  if ((oi = (int *)malloc(2 * size * sizeof(int))) == NULL) {
    UDM_FREE(c); UDM_FREE(wlen);
    return NULL;
  }
  oi[0]=0;

  DocSize = UdmVarListFindInt(&Doc->Sections, "Content-Length", UDM_MAXDOCSIZE);

  if ((DocSize == 0) ||  ((HEnd=HDoc = (char *)malloc(DocSize)) == NULL) ) {
    UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen);
    return NULL;
  }
  HDoc[0]='\0';

  if ( (uni = (int *)malloc((DocSize + 10) * sizeof(int)) ) == NULL) {
    UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc);
    return NULL;
  }

  UdmHTMLTOKInit(&tag); 
  
  rec_id = UdmVarListFindInt(&Doc->Sections, "URL_ID", 0);
  dbnum = ((size_t)rec_id) % query->Conf->dbl.nitems;

  if ((tag.socket = s = query->Conf->dbl.db[dbnum].stored_sd) <= 0)  {
    Source = (char*)strdup(UdmVarListFindStr(&Doc->Sections, "body", ""));
/*
    UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc); UDM_FREE(uni);
    return NULL;
*/
  } else {
    tag.next_b = &UdmNextCharB_stored;
    tag.next_e = &UdmNextCharE_stored;
    tag.chunks = 1;
  

    UdmSend(s, hello, 1, 0);
    UdmSend(s, &rec_id, sizeof(rec_id), 0);
    UdmRecvall(s, &ChunkSize, sizeof(ChunkSize));

    if (ChunkSize == 0) {
      UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc); UDM_FREE(uni);
      return NULL;
    }
    UdmSend(s, &tag.chunks, sizeof(tag.chunks), 0);
    UdmRecvall(s, &ChunkSize, sizeof(ChunkSize));
    if (ChunkSize == 0) {
      UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc); UDM_FREE(uni);
      return NULL;
    }

    if ((tag.Content = (char*)malloc(ChunkSize+10)) == NULL) {
      UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc); UDM_FREE(uni);
      return NULL;
    }
    UdmRecvall(s, tag.Content, ChunkSize);
    tag.Content[ChunkSize] = '\0';
    
    Source = tag.Content;
    needFreeSource = 0;
  }

  htok = UdmHTMLToken(Source, &last, &tag);
  for (len = 0; (len == 0) && htok; ) {
    switch(tag.type) {
    case UDM_HTML_TXT:
      if (tag.script == 0 && tag.comment == 0 && tag.style == 0) {
     memcpy(HEnd, htok, (size_t)(last-htok));
     HEnd += last - htok;
     HEnd[0] = '\0';
     len = HEnd - HDoc;
      }
      break;
    case UDM_HTML_COM:
    case UDM_HTML_TAG:
    default:
      break;
    }
    htok = UdmHTMLToken(NULL, &last, &tag);
  }

  if (HEnd == HDoc) {
    UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc); UDM_FREE(uni);
    if (!tag.finished) {
      tag.chunks = 0;
      if (s >= 0) UdmSend(s, &tag.chunks, sizeof(tag.chunks), 0);
    }
    UDM_FREE(Source);
    return NULL;
  }

  prevlen = 0;
  ulen = 0;

  add = UdmConv(&dc_uni, (char*)uni, sizeof(*uni)*(DocSize+10), HDoc, len + 1) / sizeof(*uni);
  prevlen = len;
  ulen += UdmUniLen(uni);


  for (p = prevend = uni; UdmUniLen(oi) < size; ) {


    while((np  = UdmUniStrWWL(p, &(Res->WWList), c, wlen)) == NULL) {

      while(htok && (len == prevlen)) {
     switch(tag.type) {
     case UDM_HTML_TXT:
       if (tag.script == 0 && tag.comment == 0 && tag.style == 0) {
         memcpy(HEnd, htok, (size_t)(last-htok));
         HEnd += last - htok;
         HEnd[0] = '\0';
         len = HEnd - HDoc;
       }
       break;
     case UDM_HTML_COM:
     case UDM_HTML_TAG:
     default:
       break;
     }
     htok = UdmHTMLToken(NULL, &last, &tag);
      }

      if (len == prevlen) break;

      add = UdmConv(&dc_uni, (char*)(uni + ulen), sizeof(*uni)*(DocSize + 10 - ulen), HDoc + prevlen, len - prevlen + 1) / sizeof(*uni);
      prevlen = len;
      p = (ulen < maxwlen) ? uni : (uni + (ulen - maxwlen));
      ulen += UdmUniLen(uni+ulen);

    }
    if (np == NULL) break;
    p = np;
    if ( ( (p > uni) && ( (!UdmUniNSpace(*(p-1))) || ( (*(p-1)) < 0x30) ) ) || (p == uni)  ) {
      start = udm_max(udm_max(p - 64, uni), prevend);
      end = udm_min(p + 64, uni + ulen);
      while(UdmUniNSpace(*start) && (start < p) && (*start > 0x2F)) start++;
      while(UdmUniNSpace(*end) && (p < end) && (*end > 0x2F)) end--;
      if (start != uni) UdmUniStrCat(oi, dot);
      ures = *end; *end = 0; UdmUniStrCat(oi, start); *end = ures;
      if ((end != uni + ulen) && (start != prevend)) UdmUniStrCat(oi, dot);
      p = prevend = end;
/*      p = (end - uni < maxwlen) ? end : (end - maxwlen);*/
    }
    if (*p) p++;
  }


  if ((os = (char *)malloc(size * 12 * sizeof(char))) == NULL) {
    UDM_FREE(oi); UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(HDoc); UDM_FREE(uni);
    UDM_FREE(Source);
    return NULL;
  }

  
  UdmConv(&uni_lc, os, sizeof(*os) * size * 12 - 1, (char*)oi, sizeof(*oi) * (UdmUniLen(oi) + 1));
  os[size * 12 - 1]='\0';
  
  {
    register char *cc;
    while ((cc = strchr(os, '\n')) != NULL) {
      *cc = ' ';
    }
    while ((cc = strchr(os, '\r')) != NULL) {
      *cc = ' ';
    }
    while ((cc = strchr(os, '\t')) != NULL) {
      *cc = ' ';
    }
  }
  if (!tag.finished) {
    tag.chunks = 0;
    if (s >= 0) UdmSend(s, &tag.chunks, sizeof(tag.chunks), 0);
  }
  UDM_FREE(c); UDM_FREE(wlen); UDM_FREE(oi); UDM_FREE(HDoc); UDM_FREE(uni);
  if (needFreeSource) { UDM_FREE(Source); } else { UDM_FREE(tag.Content); }
  return os;
}



__C_LINK int __UDMCALL UdmOpenBase(UDM_AGENT *A, UDM_STORE_PARAM *P, int mode) {
  const char *vardir = UdmVarListFindStr(&A->Conf->Vars,"VarDir",UDM_VAR_DIR);
  unsigned int hash, FileNo =  (P->rec_id & UDM_STORE_BITS) % UdmVarListFindInt(&A->Conf->Vars, "StoredFiles", 0x10000);
  size_t filenamelen;
  UDM_STOREITEM  *hTable;
  char sig[8];

            hash = UDM_HASH(P->rec_id);
            filenamelen = strlen(vardir) + strlen(P->subdir) + strlen(P->indname) + strlen(P->basename) +  48;
            if (
                ((P->Ifilename = (char *)UdmXmalloc(filenamelen)) == NULL) ||
                ((P->Sfilename = (char *)UdmXmalloc(filenamelen)) == NULL)            ) {
              UdmLog(A, UDM_LOG_ERROR, "Memory alloc error 2x%d bytes", filenamelen);
              return UDM_ERROR;
            }
            sprintf(P->Sfilename, "%s/%s/%s%04x.s", vardir, P->subdir, P->basename, FileNo);
            sprintf(P->Ifilename, "%s/%s/%s%04x.i", vardir, P->subdir, P->indname, FileNo);

            if ((P->Ifd = fopen(P->Ifilename, "rb+")) == NULL) {
              if ((P->Ifd = fopen(P->Ifilename, "wb+")) == NULL) {
                UdmLog(A, UDM_LOG_ERROR, "Can't open/create file %s", P->Ifilename);
                return UDM_ERROR;
              }
              if ((hTable = (UDM_STOREITEM *)UdmXmalloc(sizeof(UDM_STOREITEM) * UDM_HASH_PRIME)) == NULL) {
                UdmLog(A, UDM_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(UDM_STOREITEM) * UDM_HASH_PRIME);
                return UDM_ERROR;
              }
              bzero((void*)hTable, sizeof(UDM_STOREITEM) * UDM_HASH_PRIME);
              if (
               (fwrite(UDM_STOREIND_SIG, UDM_SIG_LEN, 1, P->Ifd) != 1) ||
               (fwrite(hTable, sizeof(UDM_STOREITEM), UDM_HASH_PRIME, P->Ifd) != UDM_HASH_PRIME) 
               ) {
                UdmLog(A, UDM_LOG_ERROR, "Can't set new index for file %s", P->Ifilename);
                UDM_FREE(hTable);
                return UDM_ERROR;
              }
              UDM_FREE(hTable);
              if (fseek(P->Ifd, 0, SEEK_SET)) {
                UdmLog(A, UDM_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
                return UDM_ERROR;
              }
            }
            switch (mode) {
            case UDM_READ_LOCK:
              UdmReadLockFILE(P->Ifd);
              break;
            case UDM_WRITE_LOCK:
              UdmWriteLockFILE(P->Ifd);
              break;
            }

            if ((P->Sfd = fopen(P->Sfilename, "rb+")) == NULL) {
              if ((P->Sfd = fopen(P->Sfilename, "wb+")) == NULL) {
                UdmLog(A, UDM_LOG_ERROR, "Can't open/create file %s", P->Sfilename);
                return UDM_ERROR;
              }
              if (fwrite(P->BASE_SIG, UDM_SIG_LEN, 1, P->Sfd) != 1) { 
                UdmLog(A, UDM_LOG_ERROR, "Can't set signature for file %s", P->Sfilename);
                return UDM_ERROR;
              }
              if (fseek(P->Sfd, 0, SEEK_SET)) {
                UdmLog(A, UDM_LOG_ERROR, "Can't seek for file %s", P->Sfilename);
                return UDM_ERROR;
              }
            }
            switch(mode) {
            case UDM_READ_LOCK:
              UdmReadLockFILE(P->Sfd);
              break;
            case UDM_WRITE_LOCK:
              UdmWriteLockFILE(P->Sfd);
              break;
            }

            /* check store signature or set for new */
            if (fread(sig, UDM_SIG_LEN, 1, P->Sfd) != 1) {
              UdmLog(A, UDM_LOG_ERROR, "Can't read signature for file %s", P->Sfilename);
              return UDM_ERROR;
            }
            if ( strcmp(sig, P->BASE_SIG) != 0 ) {
              UdmLog(A, UDM_LOG_ERROR, "Can't check signature %s for file %s", sig, P->Sfilename);
              return UDM_ERROR;
            }
            /* check index signature or set for new */
            if  (fread(sig, UDM_SIG_LEN, 1, P->Ifd) != 1) {/* can't read signature, try to write one */
              UdmLog(A, UDM_LOG_ERROR, "Can't read signature for file %s", P->Ifilename);
              return UDM_ERROR;
            }
            if ( strcmp(sig, UDM_STOREIND_SIG) != 0 ) {
              UdmLog(A, UDM_LOG_ERROR, "Can't check signature for file %s", P->Ifilename);
              return UDM_ERROR;
            }

              /* search rec_id */

              if (fseek(P->Ifd, (long)(UDM_SIG_LEN + hash * sizeof(UDM_STOREITEM)), SEEK_SET)) {
                UdmLog(A, UDM_LOG_ERROR, "Can't seeek for file %s", P->Ifilename);
                return UDM_ERROR;
              }
              P->CurrentItemPos = ftell(P->Ifd);
              if (fread(&P->Item, sizeof(UDM_STOREITEM), 1, P->Ifd) != 1) {
                UdmLog(A, UDM_LOG_ERROR, "{%s:%d} Can't read index for file %s seek:%ld hash: %u (%d)", 
                         __FILE__, __LINE__, P->Ifilename, P->CurrentItemPos, hash, hash);
                return UDM_ERROR;
              }

              P->mishash = 0;
              while((P->Item.next != 0) && (P->Item.rec_id != P->rec_id)) {
                P->CurrentItemPos = P->Item.next;
                if (fseek(P->Ifd, P->CurrentItemPos, SEEK_SET)) {
               UdmLog(A, UDM_LOG_ERROR, "Can't seeek for file %s", P->Ifilename);
               return UDM_ERROR;
                }
                if (fread(&P->Item, sizeof(UDM_STOREITEM), 1, P->Ifd) != 1) {
               UdmLog(A, UDM_LOG_ERROR, "Can't read hash chain for file %s", P->Ifilename);
               return UDM_ERROR;
                }
                P->mishash = 1;
              }

              return UDM_OK;
}

__C_LINK int __UDMCALL UdmCloseBase(UDM_STORE_PARAM *P) {
  fflush(P->Sfd);
  fflush(P->Ifd);
  UdmUnLockFILE(P->Sfd); 
  UdmUnLockFILE(P->Ifd); 
  fclose(P->Sfd); 
  fclose(P->Ifd); 
  UDM_FREE(P->Ifilename);
  UDM_FREE(P->Sfilename);
  return UDM_OK;
}

#define ABORT(x)    UDM_FREE(Doc); \
                    UDM_FREE(CDoc); \
                    UdmCloseBase(&P); \
              return (x);

static int UdmStoreDeleteRec(UDM_AGENT *Agent, int ns, urlid_t rec_id, char *Client) {
  size_t DocSize = 0;
  UDM_STORE_PARAM P;
  long NextItemPos;

            bzero(&P, sizeof(P));
            P.subdir = "store";
            P.BASE_SIG = UDM_STORE_SIG;
            P.basename = "";
            P.indname = "";
            P.rec_id = rec_id;
            if (UdmOpenBase(Agent, &P, UDM_WRITE_LOCK) != UDM_OK) {
              if (ns > 0) UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                    UdmCloseBase(&P);
              return UDM_ERROR;;
            }

            if (P.Item.rec_id == rec_id) {
              NextItemPos = P.Item.next;
              while(NextItemPos != 0) {
                if (fseek(P.Ifd, NextItemPos, SEEK_SET)) {
               UdmCloseBase(&P);
               return UDM_ERROR;
                }
                if (fread(&P.Item, sizeof(UDM_STOREITEM), 1, P.Ifd) != 1) {
               UdmLog(Agent, UDM_LOG_ERROR, "Can't read hash chain for file %s", P.Ifilename);
               UdmCloseBase(&P);
               return UDM_ERROR;
                }
                if (fseek(P.Ifd, P.CurrentItemPos, SEEK_SET)) {
               UdmCloseBase(&P);
               return UDM_ERROR;
                }
                P.CurrentItemPos = NextItemPos;
                NextItemPos = P.Item.next;
                P.Item.next = P.CurrentItemPos;
                if (fwrite(&P.Item, sizeof(UDM_STOREITEM), 1, P.Ifd) != 1) {
               UdmLog(Agent, UDM_LOG_ERROR, "Can't write hash chain for file %s", P.Ifilename);
               UdmCloseBase(&P);
               return UDM_ERROR;
                }
              }
              P.Item.rec_id = 0;
              P.Item.offset = 0;
              P.Item.next = 0;
              if (fseek(P.Ifd, P.CurrentItemPos, SEEK_SET)) {
                UdmCloseBase(&P);
                return UDM_ERROR;
              }
              if (fwrite(&P.Item, sizeof(UDM_STOREITEM), 1, P.Ifd) != 1) {
                UdmLog(Agent, UDM_LOG_ERROR, "Can't write hash chain for file %s", P.Ifilename);
                UdmCloseBase(&P);
                return UDM_ERROR;
              }
              UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Deleted rec_id: %x", Client, rec_id);
            } else {
              UdmLog(Agent, UDM_LOG_EXTRA, "[%s] rec_id: %x not found for delete", Client, rec_id);
            }
            UdmCloseBase(&P);
            return UDM_OK;

}

int UdmStoreDelete(UDM_AGENT *Agent, int ns, char *Client) {
  urlid_t rec_id;

  if (UdmRecvall(ns, &rec_id, sizeof(rec_id)) < 0) {
    return UDM_ERROR;
  }

  return UdmStoreDeleteRec(Agent, ns, rec_id, Client);
}

int UdmStoredCheck(UDM_AGENT *Agent, int ns, char *Client) {

#ifdef HAVE_SQL
  UDM_ENV *Conf = Agent->Conf;
  size_t DocSize = 0;
  UDM_STORE_PARAM P;
  unsigned int i, NFiles = UdmVarListFindInt(&Conf->Vars, "StoredFiles", 0x10000);
  urlid_t *todel = (int*)malloc(128 * sizeof(urlid_t));
  size_t ndel = 0, mdel = 128, totaldel = 0;
  char req[128];
  UDM_SQLRES   SQLRes;
  int res, notfound;
  size_t z, dbfrom = 0, dbto =  Agent->Conf->dbl.nitems;
  UDM_DB  *db;

  if (todel == NULL) return UDM_ERROR;
  if (NFiles > UDM_STORE_BITS) NFiles = UDM_STORE_BITS + 1;

  UdmLog(Agent, UDM_LOG_EXTRA, "create storedchk table(s)");
  for (z = dbfrom; z < dbto; z++) {
    db = &Agent->Conf->dbl.db[z];
    
    if(UDM_OK != (res = UdmSQLQuery(db, NULL, "DROP TABLE storedchk")))
      return res;
    if(UDM_OK != (res = UdmSQLQuery(db, NULL, "CREATE TABLE storedchk (rec_id integer, url_id integer)")))
      return res;
    if(UDM_OK != (res = UdmSQLQuery(db, &SQLRes, "SELECT rec_id, url FROM url")))
      return res;
    for( i = 0; i < UdmSQLNumRows(&SQLRes); i++) {
      sprintf(req, "INSERT INTO storedchk (rec_id, url_id) VALUES (%s, %d)",
           UdmSQLValue(&SQLRes, i, 0), UdmStrHash32(UdmSQLValue(&SQLRes,i,1)));
      if(UDM_OK != (res = UdmSQLQuery(db, NULL, req)))
     return res;
    }
    UdmSQLFree(&SQLRes);
    if(UDM_OK != (res = UdmSQLQuery(db, NULL, "CREATE INDEX storedchk_i ON storedchk (url_id)")))
      return res;
  }

  bzero(&P, sizeof(P));
  P.subdir = "store";
  P.BASE_SIG = UDM_STORE_SIG;
  P.basename = "";
  P.indname = "";

  for (i = 0; i < NFiles; i++) {
    P.rec_id = i;
    if (UdmOpenBase(Agent, &P, UDM_WRITE_LOCK) != UDM_OK) {
      UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
      UdmCloseBase(&P);
      return UDM_ERROR;;
    }
    if (fseek(P.Ifd, UDM_SIG_LEN, SEEK_SET)) {
      UdmLog(Agent, UDM_LOG_ERROR, "Can't seeek for file %s", P.Ifilename);
      return UDM_ERROR;
    }
    while (fread(&P.Item, sizeof(UDM_STOREITEM), 1, P.Ifd) == 1) {
      if (P.Item.rec_id != 0) {

     notfound = 1;
     for (z = dbfrom; notfound && (z < dbto); z++) {
       db = &Agent->Conf->dbl.db[z];

       udm_snprintf(req, sizeof(req), "SELECT rec_id FROM storedchk WHERE url_id=%d", P.Item.rec_id);
       if(UDM_OK != (res = UdmSQLQuery(db, &SQLRes, req)))
          return res;
       if (UdmSQLNumRows(&SQLRes) > 0) {
         notfound = 0;
       }
       UdmSQLFree(&SQLRes);
     
     }
     if (notfound) {
       if (ndel >= mdel) {
         mdel += 128;
         todel = (urlid_t*)realloc(todel, mdel * sizeof(urlid_t));
         if (todel == NULL) return UDM_ERROR;
       }
       todel[ndel++] = P.Item.rec_id;
     }
      }
    }
    UdmCloseBase(&P);
    for (z = 0; z < ndel; z++) {
        UdmLog(Agent, UDM_LOG_DEBUG, "Store %03X: deleting url_id: %X", i, todel[z]);
     if ((res = UdmStoreDeleteRec(Agent, -1, todel[z], "Stored Check-up")) != UDM_OK) {
       return res;
     }
    }
    UdmLog(Agent, UDM_LOG_INFO, "Store %03X, %d lost records deleted", i, ndel);
    totaldel += ndel;
    ndel = 0;
  }
  UdmLog(Agent, UDM_LOG_EXTRA, "Total lost record(s) deleted: %d\ndrop storedchk table(s)", totaldel);
  for (z = dbfrom; z < dbto; z++) {
    db = &Agent->Conf->dbl.db[z];

    if(UDM_OK != (res = UdmSQLQuery(db, NULL, "DROP TABLE storedchk")))
      return res;
  }
  UDM_FREE(todel);
#endif
  return UDM_OK;
}

int UdmStoreFind(UDM_AGENT *Agent, int ns, char *Client) {
  urlid_t rec_id;
  size_t DocSize = 0;
  UDM_STORE_PARAM P;
  int found = 0;

            if (UdmRecvall(ns, &rec_id, sizeof(rec_id)) < 0) {
              return UDM_ERROR;
            }
            while (rec_id != 0) {
              bzero(&P, sizeof(P));
              P.subdir = "store";
              P.BASE_SIG = UDM_STORE_SIG;
              P.basename = "";
              P.indname = "";
              P.rec_id = rec_id;
              if (UdmOpenBase(Agent, &P, UDM_READ_LOCK) != UDM_OK) {
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                UdmCloseBase(&P);
                return UDM_ERROR;
              }
              if (P.Item.rec_id == rec_id) {
                found = 1;
                UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Found rec_id: %x", Client, rec_id);
              } else {
                found = 0;
                UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Not found rec_id: %x", Client, rec_id);
              }
              UdmCloseBase(&P);
              UdmSend(ns, &found, sizeof(found), 0);

              if (UdmRecvall(ns, &rec_id, sizeof(rec_id)) < 0) {
                UdmCloseBase(&P);
                return UDM_ERROR;
              }
            }
            return UDM_OK;
}

int UdmStoreGetByChunks(UDM_AGENT *Agent, int ns, char *Client) {

#ifdef HAVE_ZLIB
  urlid_t rec_id;
  size_t DocSize = 0;
  Byte *Doc = NULL, *CDoc = NULL;
  z_stream zstream;
  UDM_STORE_PARAM P;
  int chunk, i; size_t OldOut;

            if (UdmRecvall(ns, &rec_id, sizeof(rec_id)) < 0) {
              return UDM_ERROR;
            }
            
            UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Retrieve by chunks: rec_id: %x", Client, rec_id);

            bzero(&P, sizeof(P));
            P.subdir = "store";
            P.BASE_SIG = UDM_STORE_SIG;
            P.basename = "";
            P.indname = "";
            P.rec_id = rec_id;
            if (UdmOpenBase(Agent, &P, UDM_READ_LOCK) != UDM_OK) {
              UdmLog(Agent, UDM_LOG_ERROR, "[%s] UdmOpenBase error: rec_id: %x", Client, P.rec_id);
              UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
              ABORT(UDM_ERROR);
            }

            if (P.Item.rec_id == rec_id) {
              if (fseek(P.Sfd, P.Item.offset, SEEK_SET)) {
                DocSize = 0;
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(UDM_ERROR);
              }
              zstream.avail_in = DocSize = P.Item.size;
              zstream.avail_out = 0;
              zstream.zalloc = Z_NULL;
              zstream.zfree = Z_NULL;
              zstream.opaque = Z_NULL;
              CDoc = zstream.next_in = (DocSize) ? (Byte *) UdmXmalloc(DocSize) : NULL;
              Doc = zstream.next_out = (Byte *) UdmXmalloc(UDM_MAXDOCSIZE);
              if (CDoc == NULL || Doc == NULL) {
                DocSize = 0;
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(UDM_ERROR);
              }

              if ((fread(CDoc, DocSize, 1, P.Sfd) != 1)
               || (inflateInit2(&zstream, 15) != Z_OK)) {
                DocSize = 0;
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(UDM_ERROR);
              }

              OldOut = 0;
              DocSize = 1;
              UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
              for(i = 1; 1; i++) {
                if (UdmRecvall(ns, &chunk, sizeof(chunk)) < 0) {
               DocSize = 0;
               UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
               ABORT(UDM_ERROR);
                }
                if (chunk == 0) break;
                zstream.avail_out = UDM_DOCHUNKSIZE;
                inflate(&zstream, Z_SYNC_FLUSH);
                    
                DocSize = zstream.total_out - OldOut;
                UdmSend(ns, &DocSize, sizeof(DocSize), 0);
                UdmSend(ns, &Doc[OldOut], DocSize, 0);
                UdmLog(Agent, UDM_LOG_EXTRA, "[%s] rec_id: %x Chunk %i [%d bytes] sent", Client, rec_id, chunk, DocSize);
                OldOut = zstream.total_out;
              }
              inflateEnd(&zstream);

            } else {
              DocSize = 0;
              UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
              UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Not found rec_id: %x", Client, rec_id);
              ABORT(UDM_OK);
            }
            UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Retrieved by chunks rec_id: %x Size: %d Ratio: %5.2f%%", Client,
                rec_id, zstream.total_out, 100.0 * zstream.total_in / zstream.total_out);
            ABORT(UDM_OK);

/**********************/
#else
	    return UDM_ERROR;
#endif

}

int UdmStoreGet(UDM_AGENT *Agent, int ns, char *Client) {

#ifdef HAVE_ZLIB
  urlid_t rec_id;
  size_t DocSize;
  Byte *Doc = NULL, *CDoc = NULL;
  z_stream zstream;
  UDM_STORE_PARAM P;

            if (UdmRecvall(ns, &rec_id, sizeof(rec_id)) < 0) {
              return UDM_ERROR;
            }
            
            UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Retrieve rec_id: %x", Client, rec_id);
            bzero(&P, sizeof(P));
            P.subdir = "store";
            P.BASE_SIG = UDM_STORE_SIG;
            P.basename = "";
            P.indname = "";
            P.rec_id = rec_id;
            if (UdmOpenBase(Agent, &P, UDM_READ_LOCK) != UDM_OK) {
              UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
              ABORT(UDM_ERROR);
            }
            
            if (P.Item.rec_id == rec_id) {
              if (fseek(P.Sfd, P.Item.offset, SEEK_SET)) {
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(UDM_ERROR);
              }
              zstream.avail_in = DocSize = P.Item.size;
              zstream.avail_out = UDM_MAXDOCSIZE;
              CDoc = zstream.next_in = (Byte *) UdmXmalloc(DocSize);
              Doc = zstream.next_out = (Byte *) UdmXmalloc(UDM_MAXDOCSIZE);
              if (CDoc == NULL || Doc == NULL) {
                DocSize = 0;
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(UDM_ERROR);
              }
              zstream.zalloc = Z_NULL;
              zstream.zfree = Z_NULL;
              zstream.opaque = Z_NULL;
              if ((fread(CDoc, DocSize, 1, P.Sfd) != 1)
               || (inflateInit2(&zstream, 15) != Z_OK)) {
                DocSize = 0;
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(UDM_ERROR);
              }
              inflate(&zstream, Z_FINISH);
              inflateEnd(&zstream);
              DocSize = zstream.total_out;
              UdmSend(ns, &DocSize, sizeof(DocSize), 0);
              UdmSend(ns, Doc, DocSize, 0);
              
            } else {
              UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
              UdmLog(Agent, UDM_LOG_ERROR, "[%s] Not found rec_id: %x\n", Client, rec_id);
              ABORT(UDM_OK);
            }

            UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Retrieved rec_id: %x Size: %d Ratio: %5.2f%%", Client,
                 rec_id, DocSize, 100.0 * zstream.total_in / DocSize);
            ABORT(UDM_OK);
#else
	    return UDM_ERROR;
#endif
}

int UdmStoreSave(UDM_AGENT *Agent, int ns, char *Client) {

#ifdef HAVE_ZLIB
  urlid_t rec_id;
  size_t DocSize;
  Byte *Doc = NULL, *CDoc = NULL;
  z_stream zstream;
  UDM_STORE_PARAM P;
  long NewItemPos;
  
            if (UdmRecvall(ns, &rec_id, sizeof(rec_id)) < 0) {
              return UDM_ERROR;
            }

            if (UdmRecvall(ns, &DocSize, sizeof(DocSize)) < 0) {
              return UDM_ERROR;
            }
            Doc = (Byte *) malloc(DocSize);
            if (Doc == NULL) {
              return UDM_ERROR;
            }
            if (UdmRecvall(ns, Doc, DocSize) < 0) {
              return UDM_ERROR;
            }

            zstream.zalloc = Z_NULL;
            zstream.zfree = Z_NULL;
            zstream.opaque = Z_NULL;
            zstream.next_in = Doc;
          
            if (deflateInit2(&zstream, 9, Z_DEFLATED, 15, 9, Z_DEFAULT_STRATEGY) == Z_OK) {
          
              zstream.avail_in = DocSize;
              zstream.avail_out = 2 * DocSize;
              CDoc = zstream.next_out = (Byte *) UdmXmalloc(2 * DocSize);
              if (zstream.next_out == NULL) {
                return UDM_ERROR;
              }
              deflate(&zstream, Z_FINISH);
              deflateEnd(&zstream);


/* store operations */

              bzero(&P, sizeof(P));
              P.subdir = "store";
              P.BASE_SIG = UDM_STORE_SIG;
              P.basename = "";
              P.indname = "";
              P.rec_id = rec_id;
              if (UdmOpenBase(Agent, &P, UDM_WRITE_LOCK) != UDM_OK) {
                UdmSend(ns, &DocSize, sizeof(DocSize), 0); 
                ABORT(0);
              }


              if (P.Item.rec_id == rec_id) {
                if (P.Item.size < zstream.total_out) {
               if (fseek(P.Sfd, 0, SEEK_END)) {
                 ABORT(0);
               }
               P.Item.offset = ftell(P.Sfd);
                } else {
               if (fseek(P.Sfd, P.Item.offset, SEEK_SET)) {
                 ABORT(0);
               }
                }
              } else { /* new rec_id added */
                if (P.mishash && P.Item.rec_id != 0) {
               if (fseek(P.Ifd, 0, SEEK_END)) {
                 ABORT(0);
               }
               P.Item.next = NewItemPos = ftell(P.Ifd);
               if (fseek(P.Ifd, P.CurrentItemPos, SEEK_SET)) {
                 ABORT(0);
               }
               if (fwrite(&P.Item, sizeof(UDM_STOREITEM), 1, P.Ifd) != 1) {
                 ABORT(0);
               }
               P.CurrentItemPos = NewItemPos;
                }
                P.Item.rec_id = rec_id;
                P.Item.next = 0;
                if (fseek(P.Sfd, 0, SEEK_END)) {
               ABORT(0);
                }
                P.Item.offset = ftell(P.Sfd);
              }
              P.Item.size = zstream.total_out;
              if (fwrite(CDoc, zstream.total_out, 1,  P.Sfd) != 1) {
                ABORT(0);
              }
              if (fseek(P.Ifd, P.CurrentItemPos, SEEK_SET)) {
                ABORT(0);
              }
              if (fwrite(&P.Item, sizeof(UDM_STOREITEM), 1, P.Ifd) != 1) {
                UdmLog(Agent, UDM_LOG_ERROR, "Can't write index for file %s", P.Ifilename);
              }
              UdmLog(Agent, UDM_LOG_EXTRA, "[%s] Stored rec_id: %x Size: %d Ratio: %5.2f%%", Client,
                  rec_id, DocSize, 100.0 * zstream.total_out / DocSize);


              ABORT(UDM_OK);
/* /store operations */
            }

            ABORT(UDM_ERROR);
#else
	    return UDM_ERROR;
#endif

}
