aboutsummaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/CMakeLists.txt11
-rw-r--r--examples/bwt.c220
-rw-r--r--examples/mksary.c193
-rw-r--r--examples/sasearch.c165
-rw-r--r--examples/suftest.c164
-rw-r--r--examples/unbwt.c207
6 files changed, 960 insertions, 0 deletions
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 000000000000..e801c81a4ea9
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+## Add definitions ##
+add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64)
+
+## Targets ##
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
+ "${CMAKE_CURRENT_BINARY_DIR}/../include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib")
+foreach(src suftest mksary sasearch bwt unbwt)
+ add_executable(${src} ${src}.c)
+ target_link_libraries(${src} divsufsort)
+endforeach(src)
diff --git a/examples/bwt.c b/examples/bwt.c
new file mode 100644
index 000000000000..5a362d0179a0
--- /dev/null
+++ b/examples/bwt.c
@@ -0,0 +1,220 @@
+/*
+ * bwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+size_t
+write_int(FILE *fp, saidx_t n) {
+ unsigned char c[4];
+ c[0] = (unsigned char)((n >> 0) & 0xff), c[1] = (unsigned char)((n >> 8) & 0xff),
+ c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
+ return fwrite(c, sizeof(unsigned char), 4, fp);
+}
+
+static
+void
+print_help(const char *progname, int status) {
+ fprintf(stderr,
+ "bwt, a burrows-wheeler transform program, version %s.\n",
+ divsufsort_version());
+ fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
+ fprintf(stderr, " -b num set block size to num MiB [1..512] (default: 32)\n\n");
+ exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+ FILE *fp, *ofp;
+ const char *fname, *ofname;
+ sauchar_t *T;
+ saidx_t *SA;
+ LFS_OFF_T n;
+ size_t m;
+ saidx_t pidx;
+ clock_t start,finish;
+ saint_t i, blocksize = 32, needclose = 3;
+
+ /* Check arguments. */
+ if((argc == 1) ||
+ (strcmp(argv[1], "-h") == 0) ||
+ (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+ if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
+ i = 1;
+ if(argc == 5) {
+ if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
+ blocksize = atoi(argv[i + 1]);
+ if(blocksize < 0) { blocksize = 1; }
+ else if(512 < blocksize) { blocksize = 512; }
+ i += 2;
+ }
+ blocksize <<= 20;
+
+ /* Open a file for reading. */
+ if(strcmp(argv[i], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
+#else
+ if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ fp = stdin;
+ fname = "stdin";
+ needclose ^= 1;
+ }
+ i += 1;
+
+ /* Open a file for writing. */
+ if(strcmp(argv[i], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
+#else
+ if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ ofp = stdout;
+ ofname = "stdout";
+ needclose ^= 2;
+ }
+
+ /* Get the file size. */
+ if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+ n = LFS_FTELL(fp);
+ rewind(fp);
+ if(n < 0) {
+ fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if(0x20000000L < n) { n = 0x20000000L; }
+ if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
+ } else if(blocksize == 0) { blocksize = 32 << 20; }
+
+ /* Allocate 5blocksize bytes of memory. */
+ T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
+ SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
+ if((T == NULL) || (SA == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Write the blocksize. */
+ if(write_int(ofp, blocksize) != 4) {
+ fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ fprintf(stderr, " BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
+ start = clock();
+ for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
+ /* Burrows-Wheeler Transform. */
+ pidx = divbwt(T, T, SA, m);
+ if(pidx < 0) {
+ fprintf(stderr, "%s (bw_transform): %s.\n",
+ argv[0],
+ (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Write the bwted data. */
+ if((write_int(ofp, pidx) != 4) ||
+ (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
+ fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ }
+ if(ferror(fp)) {
+ fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ finish = clock();
+ fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
+ n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Close files */
+ if(needclose & 1) { fclose(fp); }
+ if(needclose & 2) { fclose(ofp); }
+
+ /* Deallocate memory. */
+ free(SA);
+ free(T);
+
+ return 0;
+}
diff --git a/examples/mksary.c b/examples/mksary.c
new file mode 100644
index 000000000000..b48177cf8e95
--- /dev/null
+++ b/examples/mksary.c
@@ -0,0 +1,193 @@
+/*
+ * mksary.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+ fprintf(stderr,
+ "mksary, a simple suffix array builder, version %s.\n",
+ divsufsort_version());
+ fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
+ exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+ FILE *fp, *ofp;
+ const char *fname, *ofname;
+ sauchar_t *T;
+ saidx_t *SA;
+ LFS_OFF_T n;
+ clock_t start, finish;
+ saint_t needclose = 3;
+
+ /* Check arguments. */
+ if((argc == 1) ||
+ (strcmp(argv[1], "-h") == 0) ||
+ (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+ if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
+
+ /* Open a file for reading. */
+ if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+ if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ fp = stdin;
+ fname = "stdin";
+ needclose ^= 1;
+ }
+
+ /* Open a file for writing. */
+ if(strcmp(argv[2], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
+#else
+ if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ ofp = stdout;
+ ofname = "stdout";
+ needclose ^= 2;
+ }
+
+ /* Get the file size. */
+ if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+ n = LFS_FTELL(fp);
+ rewind(fp);
+ if(n < 0) {
+ fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if(0x7fffffff <= n) {
+ fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Allocate 5blocksize bytes of memory. */
+ T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+ SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+ if((T == NULL) || (SA == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n bytes of data. */
+ if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if(needclose & 1) { fclose(fp); }
+
+ /* Construct the suffix array. */
+ fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
+ start = clock();
+ if(divsufsort(T, SA, (saidx_t)n) != 0) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ finish = clock();
+ fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Write the suffix array. */
+ if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
+ fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if(needclose & 2) { fclose(ofp); }
+
+ /* Deallocate memory. */
+ free(SA);
+ free(T);
+
+ return 0;
+}
diff --git a/examples/sasearch.c b/examples/sasearch.c
new file mode 100644
index 000000000000..7e5ca4fe04ed
--- /dev/null
+++ b/examples/sasearch.c
@@ -0,0 +1,165 @@
+/*
+ * sasearch.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+ fprintf(stderr,
+ "sasearch, a simple SA-based full-text search tool, version %s\n",
+ divsufsort_version());
+ fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname);
+ exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+ FILE *fp;
+ const char *P;
+ sauchar_t *T;
+ saidx_t *SA;
+ LFS_OFF_T n;
+ size_t Psize;
+ saidx_t i, size, left;
+
+ if((argc == 1) ||
+ (strcmp(argv[1], "-h") == 0) ||
+ (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+ if(argc != 4) { print_help(argv[0], EXIT_FAILURE); }
+
+ P = argv[1];
+ Psize = strlen(P);
+
+ /* Open a file for reading. */
+#if HAVE_FOPEN_S
+ if(fopen_s(&fp, argv[2], "rb") != 0) {
+#else
+ if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Get the file size. */
+ if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+ n = LFS_FTELL(fp);
+ rewind(fp);
+ if(n < 0) {
+ fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Allocate 5n bytes of memory. */
+ T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+ SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+ if((T == NULL) || (SA == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n bytes of data. */
+ if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[2]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(fp);
+
+ /* Open the SA file for reading. */
+#if HAVE_FOPEN_S
+ if(fopen_s(&fp, argv[3], "rb") != 0) {
+#else
+ if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n * sizeof(saidx_t) bytes of data. */
+ if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[3]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(fp);
+
+ /* Search and print */
+ size = sa_search(T, (saidx_t)n,
+ (const sauchar_t *)P, (saidx_t)Psize,
+ SA, (saidx_t)n, &left);
+ for(i = 0; i < size; ++i) {
+ fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]);
+ }
+
+ /* Deallocate memory. */
+ free(SA);
+ free(T);
+
+ return 0;
+}
diff --git a/examples/suftest.c b/examples/suftest.c
new file mode 100644
index 000000000000..71892ac172b9
--- /dev/null
+++ b/examples/suftest.c
@@ -0,0 +1,164 @@
+/*
+ * suftest.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+ fprintf(stderr,
+ "suftest, a suffixsort tester, version %s.\n",
+ divsufsort_version());
+ fprintf(stderr, "usage: %s FILE\n\n", progname);
+ exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+ FILE *fp;
+ const char *fname;
+ sauchar_t *T;
+ saidx_t *SA;
+ LFS_OFF_T n;
+ clock_t start, finish;
+ saint_t needclose = 1;
+
+ /* Check arguments. */
+ if((argc == 1) ||
+ (strcmp(argv[1], "-h") == 0) ||
+ (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+ if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
+
+ /* Open a file for reading. */
+ if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+ if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ fp = stdin;
+ fname = "stdin";
+ needclose = 0;
+ }
+
+ /* Get the file size. */
+ if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+ n = LFS_FTELL(fp);
+ rewind(fp);
+ if(n < 0) {
+ fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if(0x7fffffff <= n) {
+ fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Allocate 5n bytes of memory. */
+ T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+ SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+ if((T == NULL) || (SA == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n bytes of data. */
+ if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if(needclose & 1) { fclose(fp); }
+
+ /* Construct the suffix array. */
+ fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
+ start = clock();
+ if(divsufsort(T, SA, (saidx_t)n) != 0) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ finish = clock();
+ fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Check the suffix array. */
+ if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); }
+
+ /* Deallocate memory. */
+ free(SA);
+ free(T);
+
+ return 0;
+}
diff --git a/examples/unbwt.c b/examples/unbwt.c
new file mode 100644
index 000000000000..c0f19e97ad0f
--- /dev/null
+++ b/examples/unbwt.c
@@ -0,0 +1,207 @@
+/*
+ * unbwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+size_t
+read_int(FILE *fp, saidx_t *n) {
+ unsigned char c[4];
+ size_t m = fread(c, sizeof(unsigned char), 4, fp);
+ if(m == 4) {
+ *n = (c[0] << 0) | (c[1] << 8) |
+ (c[2] << 16) | (c[3] << 24);
+ }
+ return m;
+}
+
+static
+void
+print_help(const char *progname, int status) {
+ fprintf(stderr,
+ "unbwt, an inverse burrows-wheeler transform program, version %s.\n",
+ divsufsort_version());
+ fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
+ exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+ FILE *fp, *ofp;
+ const char *fname, *ofname;
+ sauchar_t *T;
+ saidx_t *A;
+ LFS_OFF_T n;
+ size_t m;
+ saidx_t pidx;
+ clock_t start, finish;
+ saint_t err, blocksize, needclose = 3;
+
+ /* Check arguments. */
+ if((argc == 1) ||
+ (strcmp(argv[1], "-h") == 0) ||
+ (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+ if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
+
+ /* Open a file for reading. */
+ if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+ if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ fp = stdin;
+ fname = "stdin";
+ needclose ^= 1;
+ }
+
+ /* Open a file for writing. */
+ if(strcmp(argv[2], "-") != 0) {
+#if HAVE_FOPEN_S
+ if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
+#else
+ if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
+#endif
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HAVE__SETMODE && HAVE__FILENO
+ if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+ fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#endif
+ ofp = stdout;
+ ofname = "stdout";
+ needclose ^= 2;
+ }
+
+ /* Read the blocksize. */
+ if(read_int(fp, &blocksize) != 4) {
+ fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Allocate 5blocksize bytes of memory. */
+ T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
+ A = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
+ if((T == NULL) || (A == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
+ start = clock();
+ for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) {
+ /* Read blocksize bytes of data. */
+ if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Inverse Burrows-Wheeler Transform. */
+ if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) {
+ fprintf(stderr, "%s (reverseBWT): %s.\n",
+ argv[0],
+ (err == -1) ? "Invalid data" : "Cannot allocate memory");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Write m bytes of data. */
+ if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) {
+ fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ }
+ if(ferror(fp)) {
+ fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ finish = clock();
+ fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
+ n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Close files */
+ if(needclose & 1) { fclose(fp); }
+ if(needclose & 2) { fclose(ofp); }
+
+ /* Deallocate memory. */
+ free(A);
+ free(T);
+
+ return 0;
+}