Author: Laszlo Kajan <lkajan@rostlab.org>
Author: Elso Andras <elso.andras@gmail.com>
Subject: 40000000 bug and basename
 [Laszlo Kajan]
 The default value 40000000 for FFINDEX_MAX_INDEX_ENTRIES_DEFAULT is too high for a machine with 2G (or less) RAM.
 Bug fixed by introducing an environment variable FFINDEX_MAX_INDEX_ENTRIES to control the value.
 [Elso Andras]
 using basename for usage print, this helps help2man if a packager is using it
Forwarded: yes

--- a/src/ffindex_apply.c
+++ b/src/ffindex_apply.c
@@ -34,8 +34,10 @@
   if(argn < 4)
   {
     fprintf(stderr, "USAGE: %s DATA_FILENAME INDEX_FILENAME PROGRAM [PROGRAM_ARGS]*\n"
+                    "ENVIRONMENT\n"
+                    "\tFFINDEX_MAX_INDEX_ENTRIES - allocate memory for this number of entries\n"
                     "\nDesigned and implemented by Andy Hauser <hauser@genzentrum.lmu.de>.\n",
-                    argv[0]);
+                    basename(argv[0]));
     return -1;
   }
   char *data_filename  = argv[1];
--- a/src/ffindex_apply_mpi.c
+++ b/src/ffindex_apply_mpi.c
@@ -195,7 +195,7 @@
                     "\t-m\tDo not merge the FFindex parts generated by the different MPI processes\n"
                     "\t\tThis is useful for large MPI Jobs where merge time might be accounted.\n"
                     "\nDesigned and implemented by Andy Hauser <hauser@genzentrum.lmu.de>.\n",
-                    argv[0]);
+                    basename(argv[0]));
     return -1;
   }
   read_buffer = malloc(400 * 1024 * 1024);
--- a/src/ffindex_build.c
+++ b/src/ffindex_build.c
@@ -40,6 +40,8 @@
                     "\t-s\t\tsort index file, so that the index can queried.\n"
                     "\t\t\tAnother append operations can be done without sorting.\n"
                     "\t-v\t\tprint version and other info then exit\n"
+                    "ENVIRONMENT\n"
+                    "\tFFINDEX_MAX_INDEX_ENTRIES - allocate memory for this number of entries\n"
                     "\nEXAMPLES:\n"
                     "\tCreate a new ffindex containing all files from the \"bar/\" directory containing\n"
                     "\tsay myfile1.txt, myfile2.txt and sort (-s) it so that e.g. ffindex_get can use it.\n"
@@ -52,7 +54,7 @@
                     "\tMaximum key/filename length is %d and maximum entries are by default %d\n"
                     "\tThis can be changed in the sources.\n"
                     "\nDesigned and implemented by Andreas W. Hauser <hauser@genzentrum.lmu.de>.\n",
-                    program_name, MAX_FILENAME_LIST_FILES, FFINDEX_MAX_ENTRY_NAME_LENTH, FFINDEX_MAX_INDEX_ENTRIES_DEFAULT);
+                    basename(program_name), MAX_FILENAME_LIST_FILES, FFINDEX_MAX_ENTRY_NAME_LENTH, FFINDEX_MAX_INDEX_ENTRIES_DEFAULT);
 }
 
 int main(int argn, char **argv)
--- a/src/ffindex_from_fasta.c
+++ b/src/ffindex_from_fasta.c
@@ -33,7 +33,9 @@
 {
     fprintf(stderr, "USAGE: %s -v | [-s] data_filename index_filename fasta_filename\n"
                     "\t-s\tsort index file\n"
-                    "\nDesigned and implemented by Andreas W. Hauser <hauser@genzentrum.lmu.de>.\n", program_name);
+                    "ENVIRONMENT\n"
+                    "\tFFINDEX_MAX_INDEX_ENTRIES - allocate memory for this number of entries\n"
+                    "\nDesigned and implemented by Andreas W. Hauser <hauser@genzentrum.lmu.de>.\n", basename(program_name));
 }
 
 int main(int argn, char **argv)
@@ -127,4 +129,4 @@
   return err;
 }
 
-/* vim: ts=2 sw=2 et: */
+// vim: ts=2 sw=2 et:
--- a/src/ffindex_get.c
+++ b/src/ffindex_get.c
@@ -26,8 +26,10 @@
 {
     fprintf(stderr, "USAGE: %s data_filename index_filename entry name(s)\n"
                     "-n\tuse index of entry instead of entry name\n"
+                    "ENVIRONMENT\n"
+                    "\tFFINDEX_MAX_INDEX_ENTRIES - allocate memory for this number of entries\n"
                     "\nDesigned and implemented by Andy Hauser <hauser@genzentrum.lmu.de>.\n",
-                    program_name);
+                    basename(program_name));
 }
 
 int main(int argn, char **argv)
--- a/src/ffindex_modify.c
+++ b/src/ffindex_modify.c
@@ -37,7 +37,7 @@
                     "\t-u\tunlink entry (remove from index only)\n"
                     "\t-v\tprint version and other info then exit\n"
                     "\nDesigned and implemented by Andreas W. Hauser <hauser@genzentrum.lmu.de>.\n",
-                    program_name, MAX_FILENAME_LIST_FILES);
+                    basename(program_name), MAX_FILENAME_LIST_FILES);
 }
 
 int main(int argn, char **argv)
@@ -124,7 +124,7 @@
     }
     else
     {
-      char** sorted_names_to_unlink = malloc(FFINDEX_MAX_INDEX_ENTRIES_DEFAULT * sizeof(char *));
+      char** sorted_names_to_unlink = malloc(ffindex_max_index_entries() * sizeof(char *));
       if(sorted_names_to_unlink == NULL)
         fferror_print(__FILE__, __LINE__, __func__, "malloc failed");
       /* For each list_file unlink all entries */
--- a/src/ffindex_unpack.c
+++ b/src/ffindex_unpack.c
@@ -35,7 +35,7 @@
   {
     fprintf(stderr, "USAGE: %s DATA_FILENAME INDEX_FILENAME OUT_DIR\n"
                     "\nDesigned and implemented by Andy Hauser <hauser@genzentrum.lmu.de>.\n",
-                    argv[0]);
+                    basename(argv[0]));
     return -1;
   }
   char *data_filename  = argv[1];
--- a/src/ffindex.c
+++ b/src/ffindex.c
@@ -227,7 +227,7 @@
 ffindex_index_t* ffindex_index_parse(FILE *index_file, size_t num_max_entries)
 {
   if(num_max_entries == 0)
-    num_max_entries = FFINDEX_MAX_INDEX_ENTRIES_DEFAULT;
+    num_max_entries = ffindex_max_index_entries();
   size_t nbytes = sizeof(ffindex_index_t) + (sizeof(ffindex_entry_t) * num_max_entries);
   ffindex_index_t *index = (ffindex_index_t *)malloc(nbytes);
   if(index == NULL)
--- a/src/ffindex.h
+++ b/src/ffindex.h
@@ -22,6 +22,8 @@
 #include <stdio.h>
 
 #define FFINDEX_VERSION 0.980
+// lkajan: with ~72 bytes per entry, the below makes a default allocation over 2G. malloc in ffindex.c:225 fails on a machine with 'only' 2G of RAM.
+// lkajan: let the default remain 40000000, and let the actual value be controlled by the env var FFINDEX_MAX_INDEX_ENTRIES wherever FFINDEX_MAX_INDEX_ENTRIES_DEFAULT is used.
 #define FFINDEX_MAX_INDEX_ENTRIES_DEFAULT 40000000
 #define FFINDEX_MAX_ENTRY_NAME_LENTH 32
 
@@ -45,6 +47,8 @@
   ffindex_entry_t entries[]; /* This array is as big as the excess memory allocated for this struct. */
 } ffindex_index_t;
 
+inline size_t ffindex_max_index_entries(){ char *FMIE = getenv("FFINDEX_MAX_INDEX_ENTRIES"); if( FMIE ) { return atol( FMIE ); } else { return FFINDEX_MAX_INDEX_ENTRIES_DEFAULT; } }
+
 int ffindex_insert_memory(FILE *data_file, FILE *index_file, size_t *offset, char *from_start, size_t from_length, char *name);
 
 int ffindex_insert_file(FILE *data_file, FILE *index_file, size_t *offset, const char *path, char *name);
