Main Page   Alphabetical List   Data Structures   File List   Data Fields   Globals  

csvtable.c

Go to the documentation of this file.
00001 
00019 #ifdef STANDALONE
00020 #include <sqlite3.h>
00021 #else
00022 #include <sqlite3ext.h>
00023 static SQLITE_EXTENSION_INIT1
00024 #endif
00025 
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <limits.h>
00030 
00031 #ifdef _WIN32
00032 #include <windows.h>
00033 #define strcasecmp  _stricmp
00034 #define strncasecmp _strnicmp
00035 #endif
00036 
00043 typedef struct csv_file {
00044     FILE *f;            
00045     char *sep;          
00046     char *quot;         
00047     int isdos;          
00048     int maxl;           
00049     char *line;         
00050     long pos0;          
00051     int maxc;           
00052     int ncols;          
00053     char **cols;        
00054 } csv_file;
00055 
00062 typedef struct csv_guess_fmt {
00063     int nlines;
00064     int hist[256];
00065 } csv_guess_fmt;
00066 
00073 typedef struct csv_vtab {
00074     sqlite3_vtab vtab;  
00075     csv_file *csv;      
00076     int convert;        
00077     char coltypes[1];   
00078 } csv_vtab;
00079 
00086 typedef struct {
00087     sqlite3_vtab_cursor cursor;         
00088     long pos;                           
00089 } csv_cursor;
00090 
00096 static void
00097 append_free(char **in)
00098 {
00099     long *p = (long *) *in;
00100 
00101     if (p) {
00102         p -= 2;
00103         sqlite3_free(p);
00104         *in = 0;
00105     }
00106 }
00107 
00117 static char *
00118 append(char **in, char const *append, char quote)
00119 {
00120     long *p = (long *) *in;
00121     long len, maxlen, actlen;
00122     int i;
00123     char *pp;
00124     int nappend = append ? strlen(append) : 0;
00125 
00126     if (p) {
00127         p -= 2;
00128         maxlen = p[0];
00129         actlen = p[1];
00130     } else {
00131         maxlen = actlen = 0;
00132     }
00133     len = nappend + actlen;
00134     if (quote) {
00135         len += 2;
00136         for (i = 0; i < nappend; i++) {
00137             if (append[i] == quote) {
00138                 len++;
00139             }
00140         }
00141     } else if (!nappend) {
00142         return *in;
00143     }
00144     if (len >= maxlen - 1) {
00145         long *q;
00146 
00147         maxlen = (len + 0x03ff) & (~0x3ff);
00148         q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
00149         if (!q) {
00150             return 0;
00151         }
00152         if (!p) {
00153             q[1] = 0;
00154         }
00155         p = q;
00156         p[0] = maxlen;
00157         *in = (char *) (p + 2);
00158     }
00159     pp = *in + actlen;
00160     if (quote) {
00161         *pp++ = quote;
00162         for (i = 0; i < nappend; i++) {
00163             *pp++ = append[i];
00164             if (append[i] == quote) {
00165                 *pp++ = quote;
00166             }
00167         }
00168         *pp++ = quote;
00169         *pp = '\0';
00170     } else {
00171         if (nappend) {
00172             memcpy(pp, append, nappend);
00173             pp += nappend;
00174             *pp = '\0';
00175         }
00176     }
00177     p[1] = pp - *in;
00178     return *in;
00179 }
00180 
00187 static char *
00188 unquote(char const *in)
00189 {
00190     char c, *ret;
00191     int i;
00192 
00193     ret = sqlite3_malloc(strlen(in) + 1);
00194     if (ret) {
00195         c = in[0];
00196         if ((c == '"') || (c == '\'')) {
00197             i = strlen(in + 1);
00198             if ((i > 0) && (in[i] == c)) {
00199                 strcpy(ret, in + 1);
00200                 ret[i - 1] = '\0';
00201                 return ret;
00202             }
00203         }
00204         strcpy(ret, in);
00205     }
00206     return ret;
00207 }
00208 
00215 static int
00216 maptype(char const *type)
00217 {
00218     int typelen = type ? strlen(type) : 0;
00219 
00220     if ((typelen >= 3) &&
00221         (strncasecmp(type, "integer", 7) == 0)) {
00222         return SQLITE_INTEGER;
00223     }
00224     if ((typelen >= 6) &&
00225         (strncasecmp(type, "double", 6) == 0)) {
00226         return SQLITE_FLOAT;
00227     }
00228     if ((typelen >= 5) &&
00229         (strncasecmp(type, "float", 5) == 0)) {
00230         return SQLITE_FLOAT;
00231     }
00232     if ((typelen >= 4) &&
00233         (strncasecmp(type, "real", 4) == 0)) {
00234         return SQLITE_FLOAT;
00235     }
00236     return SQLITE_TEXT;
00237 }
00238 
00245 static void
00246 conv_names(char **names, int ncols)
00247 {
00248     int i;
00249     char *p, *q;
00250     static const char ws[] = "\n\t\r\b\v ";
00251 
00252     if (!names || ncols <= 0) {
00253         return;
00254     }
00255     for (i = 0; i < ncols; i++) {
00256         p = names[i];
00257 
00258         while (*p) {
00259             if (strchr(ws, *p)) {
00260                 *p++ = '_';
00261                 q = p;
00262                 while (*q && strchr(ws, *q)) {
00263                     ++q;
00264                 }
00265                 if (*q && q > p) {
00266                     strcpy(p, q);
00267                 }
00268                 continue;
00269             }
00270             ++p;
00271         }
00272     }
00273 }
00274 
00285 static void
00286 result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00287                char *data, int len, int type)
00288 {
00289     char *endp;
00290 
00291     if (!data) {
00292         if (ctx) {
00293             sqlite3_result_null(ctx);
00294         } else {
00295             sqlite3_bind_null(stmt, idx);
00296         }
00297         return;
00298     }
00299     if (type == SQLITE_INTEGER) {
00300         sqlite_int64 val;
00301 #if defined(_WIN32) || defined(_WIN64)
00302         char endc;
00303 
00304         if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
00305             if (ctx) {
00306                 sqlite3_result_int64(ctx, val);
00307             } else {
00308                 sqlite3_bind_int64(stmt, idx, val);
00309             }
00310             return;
00311         }
00312 #else
00313         endp = 0;
00314 #ifdef __osf__
00315         val = strtol(data, &endp, 0);
00316 #else
00317         val = strtoll(data, &endp, 0);
00318 #endif
00319         if (endp && (endp != data) && !*endp) {
00320             if (ctx) {
00321                 sqlite3_result_int64(ctx, val);
00322             } else {
00323                 sqlite3_bind_int64(stmt, idx, val);
00324             }
00325             return;
00326         }
00327 #endif
00328     } else if (type == SQLITE_FLOAT) {
00329         double val;
00330 
00331         endp = 0;
00332         val = strtod(data, &endp);
00333         if (endp && (endp != data) && !*endp) {
00334             if (ctx) {
00335                 sqlite3_result_double(ctx, val);
00336             } else {
00337                 sqlite3_bind_double(stmt, idx, val);
00338             }
00339             return;
00340         }
00341     }
00342     if (ctx) {
00343         sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
00344     } else {
00345         sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
00346     }
00347 }
00348 
00359 static int
00360 process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00361             char *data, int type, int conv)
00362 {
00363     char c, *p;
00364     const char flchars[] = "Ee+-.,0123456789";
00365 
00366     if (!data) {
00367         goto putdata;
00368     }
00369 
00370     /*
00371      * Floating point number test,
00372      * converts single comma to dot.
00373      */
00374     c = data[0];
00375     if ((c != '\0') && strchr(flchars + 2, c)) {
00376         p = data + 1;
00377         while (*p && strchr(flchars, *p)) {
00378             ++p;
00379         }
00380         if (*p == '\0') {
00381             char *first = 0;
00382             int n = 0;
00383 
00384             p = data;
00385             while (p) {
00386                 p = strchr(p, ',');
00387                 if (!p) {
00388                     break;
00389                 }
00390                 if (++n == 1) {
00391                     first = p;
00392                 }
00393                 ++p;
00394             }
00395             if (first) {
00396                 *first = '.';
00397                 goto putdata;
00398             }
00399         }
00400     }
00401     if (conv) {
00402         char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
00403 
00404         if (utf) {
00405             p = utf;
00406             while ((c = *data) != '\0') {
00407                 if (((conv & 10) == 10) && (c == '\\')) {
00408                     if (data[1] == 'q') {
00409                         *p++ = '\'';
00410                         data += 2;
00411                         continue;
00412                     }
00413                 }
00414                 if ((conv & 2) && (c == '\\')) {
00415                     char c2 = data[1];
00416 
00417                     switch (c2) {
00418                     case '\0':
00419                         goto convdone;
00420                     case 'n':
00421                         *p = '\n';
00422                         break;
00423                     case 't':
00424                         *p = '\t';
00425                         break;
00426                     case 'r':
00427                         *p = '\r';
00428                         break;
00429                     case 'f':
00430                         *p = '\f';
00431                         break;
00432                     case 'v':
00433                         *p = '\v';
00434                         break;
00435                     case 'b':
00436                         *p = '\b';
00437                         break;
00438                     case 'a':
00439                         *p = '\a';
00440                         break;
00441                     case '?':
00442                         *p = '\?';
00443                         break;
00444                     case '\'':
00445                         *p = '\'';
00446                         break;
00447                     case '"':
00448                         *p = '\"';
00449                         break;
00450                     case '\\':
00451                         *p = '\\';
00452                         break;
00453                     default:
00454                         *p++ = c;
00455                         *p = c2;
00456                         break;
00457                     }
00458                     p++;
00459                     data += 2;
00460                     continue;
00461                 }
00462                 if ((conv & 1) && (c & 0x80)) {
00463                     *p++ = 0xc0 | ((c >> 6) & 0x1f);
00464                     *p++ = 0x80 | (c & 0x3f);
00465                 } else {
00466                     *p++ = c;
00467                 }
00468                 data++;
00469             }
00470 convdone:
00471             *p = '\0';
00472             result_or_bind(ctx, stmt, idx, utf, p - utf, type);
00473             sqlite3_free(utf);
00474             return SQLITE_OK;
00475         } else {
00476             if (ctx) {
00477                 sqlite3_result_error(ctx, "out of memory", -1);
00478             }
00479             return SQLITE_NOMEM;
00480         }
00481     }
00482 putdata:
00483     result_or_bind(ctx, stmt, idx, data, -1, type);
00484     return SQLITE_OK;
00485 }
00486 
00495 static csv_file *
00496 csv_open(const char *filename, const char *sep, const char *quot)
00497 {
00498     FILE *f;
00499     csv_file *csv;
00500 
00501 #ifdef _WIN32
00502     f = fopen(filename, "rb");
00503 #else
00504     f = fopen(filename, "r");
00505 #endif
00506     if (!f) {
00507         return 0;
00508     }
00509     csv = sqlite3_malloc(sizeof (csv_file));
00510     if (!csv) {
00511 error0:
00512         fclose(f);
00513         return 0;
00514     }
00515     csv->f = f;
00516     if (sep && sep[0]) {
00517         csv->sep = sqlite3_malloc(strlen(sep) + 1);
00518         if (!csv->sep) {
00519 error1:
00520             sqlite3_free(csv);
00521             goto error0;
00522         }
00523         strcpy(csv->sep, sep);
00524     } else {
00525         csv->sep = 0;
00526     }
00527     if (quot && quot[0]) {
00528         csv->quot = sqlite3_malloc(strlen(quot) + 1);
00529         if (!csv->quot) {
00530             if (csv->sep) {
00531                 sqlite3_free(csv->sep);
00532             }
00533             goto error1;
00534         }
00535         strcpy(csv->quot, quot);
00536     } else {
00537         csv->quot = 0;
00538     }
00539     csv->isdos = 0;
00540     csv->maxl = 0;
00541     csv->line = 0;
00542     csv->pos0 = 0;
00543     csv->maxc = 0;
00544     csv->ncols = 0;
00545     csv->cols = 0;
00546     return csv;
00547 }
00548 
00554 static void
00555 csv_close(csv_file *csv)
00556 {
00557     if (csv) {
00558         if (csv->sep) {
00559             sqlite3_free(csv->sep);
00560         }
00561         if (csv->quot) {
00562             sqlite3_free(csv->quot);
00563         }
00564         if (csv->line) {
00565             sqlite3_free(csv->line);
00566         }
00567         if (csv->cols) {
00568             sqlite3_free(csv->cols);
00569         }
00570         if (csv->f) {
00571             fclose(csv->f);
00572         }
00573         sqlite3_free(csv);
00574     }
00575 }
00576 
00583 static int
00584 csv_eof(csv_file *csv)
00585 {
00586     if (csv && csv->f) {
00587         return feof(csv->f);
00588     }
00589     return 1;
00590 }
00591 
00599 static long
00600 csv_seek(csv_file *csv, long pos)
00601 {
00602     if (csv && csv->f) {
00603         return fseek(csv->f, pos, SEEK_SET);
00604     }
00605     return EOF;
00606 }
00607 
00613 static void
00614 csv_rewind(csv_file *csv)
00615 {
00616     if (csv && csv->f) {
00617         csv_seek(csv, csv->pos0);
00618     }
00619 }
00620 
00627 static long
00628 csv_tell(csv_file *csv)
00629 {
00630     if (csv && csv->f) {
00631         return ftell(csv->f);
00632     }
00633     return EOF;
00634 }
00635 
00643 static int
00644 csv_getline(csv_file *csv, csv_guess_fmt *guess)
00645 {
00646     int i, index = 0, inq = 0, c, col;
00647     char *p, *sep;
00648 
00649     if (!csv || !csv->f) {
00650         return EOF;
00651     }
00652     while (1) {
00653         c = fgetc(csv->f);
00654         if (c == EOF) {
00655             if (index > 0) {
00656                 break;
00657             }
00658             return EOF;
00659         }
00660         if (c == '\0') {
00661             continue;
00662         }
00663         if (c == '\r') {
00664             int c2 = fgetc(csv->f);
00665             c = '\n';
00666 
00667             if (c2 == '\n') {
00668                 csv->isdos = 1;
00669             } else if (c2 != EOF) {
00670                 ungetc(c2, csv->f);
00671             }
00672         }
00673         /* check for DOS EOF (Ctrl-Z) */
00674         if (csv->isdos && (c == '\032')) {
00675             int c2 = fgetc(csv->f);
00676 
00677             if (c2 == EOF) {
00678                 if (index > 0) {
00679                     break;
00680                 }
00681                 return EOF;
00682             }
00683             ungetc(c2, csv->f);
00684         }
00685         if (index >= csv->maxl - 1) {
00686             int n = csv->maxl * 2;
00687             char *line;
00688 
00689             if (n <= 0) {
00690                 n = 4096;
00691             }
00692             line = sqlite3_malloc(n);
00693             if (!line) {
00694                 return EOF;
00695             }
00696             if (csv->line) {
00697                 memcpy(line, csv->line, index);
00698                 sqlite3_free(csv->line);
00699             }
00700             csv->maxl = n;
00701             csv->line = line;
00702         }
00703         csv->line[index++] = c;
00704         if (csv->quot && (p = strchr(csv->quot, c))) {
00705             if (inq) {
00706                 if (*p == inq) {
00707                     inq = 0;
00708                 }
00709             } else {
00710                 inq = *p;
00711             }
00712         }
00713         if (!inq && (c == '\n')) {
00714             break;
00715         }
00716     }
00717     if (guess) {
00718         for (i = 0; i < index; i++) {
00719             guess->hist[csv->line[i] & 0xFF] += 1;
00720         }
00721         guess->nlines += 1;
00722         csv->ncols = 0;
00723         return 0;
00724     }
00725 
00726     for (i = index - 1; i >= 0; i--) {
00727         if (csv->line[i] != '\n') {
00728             break;
00729         }
00730     }
00731     index = i + 1;
00732     csv->line[index] = '\0';
00733     i = inq = col = 0;
00734     sep = csv->sep ? csv->sep : ";";
00735     if (!csv->cols) {
00736         int n = 128;
00737 
00738         csv->cols = sqlite3_malloc(sizeof (char *) * n);
00739         if (!csv->cols) {
00740             return EOF;
00741         }
00742         csv->maxc = n;
00743     }
00744     csv->cols[col++] = csv->line;
00745     while (i < index) {
00746         if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
00747             if (inq) {
00748                 if (*p == inq) {
00749                     inq = 0;
00750                 }
00751             } else {
00752                 inq = *p;
00753             }
00754         }
00755         if (!inq && (p = strchr(sep, csv->line[i]))) {
00756             p = csv->line + i;
00757             *p = '\0';
00758             if (col >= csv->maxc) {
00759                 int n = csv->maxc * 2;
00760                 char **cols;
00761 
00762                 cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
00763                 if (!cols) {
00764                     return EOF;
00765                 }
00766                 csv->cols = cols;
00767                 csv->maxc = n;
00768             }
00769             csv->cols[col++] = p + 1;
00770         }
00771         ++i;
00772     }
00773     csv->ncols = col;
00774 
00775     /* strip off quotes */
00776     if (csv->quot) {
00777         for (i = 0; i < col; i++) {
00778             if (*csv->cols[i]) {
00779                 p = strchr(csv->quot, *csv->cols[i]);
00780                 if (p) {
00781                     char *src, *dst;
00782 
00783                     c = *p;
00784                     csv->cols[i] += 1;
00785                     sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
00786                     if ((sep >= csv->cols[i]) && (*sep == c)) {
00787                         *sep = '\0';
00788                     }
00789                     /* collapse quote escape sequences */
00790                     src = csv->cols[i];
00791                     dst = 0;
00792                     while (*src) {
00793                         if ((*src == c) && (src[1] == c)) {
00794                             if (!dst) {
00795                                 dst = src;
00796                             }
00797                             src++;
00798                             while (*src) {
00799                                 *dst++ = *src++;
00800                                 if (*src == c) {
00801                                     --src;
00802                                     break;
00803                                 }
00804                             }
00805                         }
00806                         ++src;
00807                     }
00808                     if (dst) {
00809                         *dst++ = '\0';
00810                     }
00811                 }
00812             }
00813         }
00814     }
00815     return col;
00816 }
00817 
00824 static int
00825 csv_ncols(csv_file *csv)
00826 {
00827     if (csv && csv->cols) {
00828         return csv->ncols;
00829     }
00830     return 0;
00831 }
00832 
00840 static char *
00841 csv_coldata(csv_file *csv, int n)
00842 {
00843     if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
00844         return csv->cols[n];
00845     }
00846     return 0;
00847 }
00848 
00855 static int
00856 csv_guess(csv_file *csv)
00857 {
00858     csv_guess_fmt guess;
00859     int i, n;
00860     char *p, sep[32], quot[4];
00861     const struct {
00862         int c;
00863         int min;
00864     } sep_test[] = {
00865         { ',', 2 },
00866         { ';', 2 },
00867         { '\t', 2 },
00868         { ' ', 4 },
00869         { '|', 2 }
00870     };
00871 
00872     if (!csv) {
00873         return EOF;
00874     }
00875     memset(&guess, 0, sizeof (guess));
00876     csv->pos0 = 0;
00877     csv_rewind(csv);
00878     for (i = n = 0; i < 10; i++) {
00879         n = csv_getline(csv, &guess);
00880         if (n == EOF) {
00881             break;
00882         }
00883     }
00884     csv_rewind(csv);
00885     if (n && !i) {
00886         return EOF;
00887     }
00888     p = quot;
00889     n = '"';
00890     if (guess.hist[n] > 1) {
00891         *p++ = n;
00892     }
00893     n = '\'';
00894     if (guess.hist[n] > 1) {
00895         *p++ = n;
00896     }
00897     *p = '\0';
00898     p = sep;
00899     for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
00900         if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
00901             *p++ = sep_test[i].c;
00902         }
00903     }
00904     *p = '\0';
00905     if (quot[0]) {
00906         p = sqlite3_malloc(strlen(quot) + 1);
00907         if (p) {
00908             strcpy(p, quot);
00909             if (csv->quot) {
00910                 sqlite3_free(csv->quot);
00911             }
00912             csv->quot = p;
00913         } else {
00914             return EOF;
00915         }
00916     }
00917     if (sep[0]) {
00918         p = sqlite3_malloc(strlen(sep) + 1);
00919         if (p) {
00920             strcpy(p, sep);
00921             if (csv->sep) {
00922                 sqlite3_free(csv->sep);
00923             }
00924             csv->sep = p;
00925         } else {
00926             return EOF;
00927         }
00928     }
00929     return 0;
00930 }
00931 
00965 static int
00966 csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
00967                  sqlite3_vtab **vtabp, char **errp)
00968 {
00969     csv_file *csv;
00970     int rc = SQLITE_ERROR, i, ncnames, row1;
00971     char **cnames, *schema = 0, **nargv;
00972     csv_vtab *vtab;
00973 
00974     if (argc < 4) {
00975         *errp = sqlite3_mprintf("input file name missing");
00976         return SQLITE_ERROR;
00977     }
00978     nargv = sqlite3_malloc(sizeof (char *) * argc);
00979     memset(nargv, 0, sizeof (char *) * argc);
00980     for (i = 3; i < argc; i++) {
00981         nargv[i] = unquote(argv[i]);
00982     }
00983     csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
00984                    (argc > 7) ? nargv[7] : 0);
00985     if (!csv) {
00986         *errp = sqlite3_mprintf("unable to open input file");
00987 cleanup:
00988         append_free(&schema);
00989         for (i = 3; i < argc; i++) {
00990             if (nargv[i]) {
00991                 sqlite3_free(nargv[i]);
00992             }
00993         }
00994         return rc;
00995     }
00996     if (!csv->sep && !csv->quot) {
00997         csv_guess(csv);
00998     }
00999     csv->pos0 = 0;
01000     row1 = 0;
01001     if (argc > 4) {
01002         row1 = strtol(nargv[4], 0, 10);
01003     }
01004     if (row1) {
01005         /* use column names from 1st row */
01006         csv_getline(csv, 0);
01007         if (csv->ncols < 1) {
01008             csv_close(csv);
01009             *errp = sqlite3_mprintf("unable to get column names");
01010             goto cleanup;
01011         }
01012         csv->pos0 = csv_tell(csv);
01013         csv_rewind(csv);
01014         ncnames = csv_ncols(csv);
01015         cnames = csv->cols;
01016     } else if (argc > 8) {
01017         ncnames = argc - 8;
01018         cnames = (char **) nargv + 8;
01019     } else {
01020         /* use number of columns from 1st row */
01021         csv_getline(csv, 0);
01022         if (csv->ncols < 1) {
01023             csv_close(csv);
01024             *errp = sqlite3_mprintf("unable to get column names");
01025             goto cleanup;
01026         }
01027         csv_rewind(csv);
01028         ncnames = csv_ncols(csv);
01029         cnames = 0;
01030     }
01031     vtab = sqlite3_malloc(sizeof(csv_vtab) + ncnames);
01032     if (!vtab) {
01033         csv_close(csv);
01034         *errp = sqlite3_mprintf("out of memory");
01035         goto cleanup;
01036     }
01037     memset(vtab, 0, sizeof (*vtab));
01038     vtab->convert = 0;
01039     if (argc > 5) {
01040         vtab->convert = strtol(nargv[5], 0, 10);
01041         if (row1 && (vtab->convert & 4)) {
01042             conv_names(cnames, ncnames);
01043         }
01044     }
01045     vtab->csv = csv;
01046     append(&schema, "CREATE TABLE x(", 0);
01047     for (i = 0; i < ncnames; i++) {
01048         vtab->coltypes[i] = SQLITE_TEXT;
01049         if (!cnames || !cnames[i]) {
01050             char colname[64];
01051 
01052             sprintf(colname, "column_%d", i + 1);
01053             append(&schema, colname, '"');
01054         } else if (row1 > 0) {
01055             append(&schema, cnames[i], '"');
01056         } else if (row1 < 0) {
01057             append(&schema, cnames[i], '"');
01058             if (i + 8 < argc) {
01059                 char *type = nargv[i + 8];
01060 
01061                 append(&schema, " ", 0);
01062                 append(&schema, type, 0);
01063                 vtab->coltypes[i] = maptype(type);
01064             }
01065         } else {
01066             char *type = cnames[i];
01067 
01068             append(&schema, cnames[i], 0);
01069             while (*type && !strchr(" \t", *type)) {
01070                 type++;
01071             }
01072             while (*type && strchr(" \t", *type)) {
01073                 type++;
01074             }
01075             vtab->coltypes[i] = maptype(type);
01076         }
01077         if (i < ncnames - 1) {
01078             append(&schema, ",", 0);
01079         }
01080     }
01081     append(&schema, ")", 0);
01082     rc = sqlite3_declare_vtab(db, schema);
01083     if (rc != SQLITE_OK) {
01084         csv_close(csv);
01085         sqlite3_free(vtab);
01086         *errp = sqlite3_mprintf("table definition failed, error %d, "
01087                                 "schema '%s'", rc, schema);
01088         goto cleanup;
01089     }
01090     *vtabp = &vtab->vtab;
01091     *errp = 0;
01092     goto cleanup;
01093 }
01094 
01106 static int
01107 csv_vtab_create(sqlite3* db, void *aux, int argc,
01108            const char *const *argv,
01109            sqlite3_vtab **vtabp, char **errp)
01110 {
01111     return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
01112 }
01113 
01120 static int
01121 csv_vtab_disconnect(sqlite3_vtab *vtab)
01122 {
01123     csv_vtab *tab = (csv_vtab *) vtab;
01124 
01125     csv_close(tab->csv);
01126     sqlite3_free(tab);
01127     return SQLITE_OK;
01128 }
01129 
01136 static int
01137 csv_vtab_destroy(sqlite3_vtab *vtab)
01138 {
01139     return csv_vtab_disconnect(vtab);
01140 }
01141 
01149 static int
01150 csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
01151 {
01152     return SQLITE_OK;
01153 }
01154 
01162 static int
01163 csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
01164 {
01165     csv_cursor *cur = sqlite3_malloc(sizeof(*cur));
01166     csv_vtab *tab = (csv_vtab *) vtab;
01167 
01168     if (!cur) {
01169         return SQLITE_ERROR;
01170     }
01171     cur->cursor.pVtab = vtab;
01172     csv_rewind(tab->csv);
01173     cur->pos = csv_tell(tab->csv);
01174     *cursorp = &cur->cursor;
01175     return SQLITE_OK;
01176 }
01177 
01184 static int
01185 csv_vtab_close(sqlite3_vtab_cursor *cursor)
01186 {
01187     sqlite3_free(cursor);
01188     return SQLITE_OK;
01189 }
01190 
01197 static int
01198 csv_vtab_next(sqlite3_vtab_cursor *cursor)
01199 {
01200     csv_cursor *cur = (csv_cursor *) cursor;
01201     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01202 
01203     cur->pos = csv_tell(tab->csv);
01204     csv_getline(tab->csv, 0);
01205     return SQLITE_OK;
01206 }
01207 
01218 static int
01219 csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
01220                 const char *idxStr, int argc, sqlite3_value **argv)
01221 {
01222     csv_cursor *cur = (csv_cursor *) cursor;
01223     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01224 
01225     csv_rewind(tab->csv);
01226     return csv_vtab_next(cursor);
01227 }
01228 
01235 static int
01236 csv_vtab_eof(sqlite3_vtab_cursor *cursor)
01237 {
01238     csv_cursor *cur = (csv_cursor *) cursor;
01239     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01240 
01241     return csv_eof(tab->csv);
01242 }
01243 
01252 static int
01253 csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
01254 {
01255     csv_cursor *cur = (csv_cursor *) cursor;
01256     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01257     char *data = csv_coldata(tab->csv, n);
01258 
01259     return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
01260 }
01261 
01269 static int
01270 csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
01271 {
01272     csv_cursor *cur = (csv_cursor *) cursor;
01273 
01274     *rowidp = cur->pos;
01275     return SQLITE_OK;
01276 }
01277 
01278 #if (SQLITE_VERSION_NUMBER > 3004000)
01279 
01286 static int
01287 csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
01288 {
01289     return SQLITE_OK;
01290 }
01291 
01292 #endif
01293 
01298 static const sqlite3_module csv_vtab_mod = {
01299     1,                   /* iVersion */
01300     csv_vtab_create,     /* xCreate */
01301     csv_vtab_connect,    /* xConnect */
01302     csv_vtab_bestindex,  /* xBestIndex */
01303     csv_vtab_disconnect, /* xDisconnect */
01304     csv_vtab_destroy,    /* xDestroy */
01305     csv_vtab_open,       /* xOpen */
01306     csv_vtab_close,      /* xClose */
01307     csv_vtab_filter,     /* xFilter */
01308     csv_vtab_next,       /* xNext */
01309     csv_vtab_eof,        /* xEof */
01310     csv_vtab_column,     /* xColumn */
01311     csv_vtab_rowid,      /* xRowid */
01312     0,                   /* xUpdate */
01313     0,                   /* xBegin */
01314     0,                   /* xSync */
01315     0,                   /* xCommit */
01316     0,                   /* xRollback */
01317     0,                   /* xFindFunction */
01318 #if (SQLITE_VERSION_NUMBER > 3004000)
01319     csv_vtab_rename,     /* xRename */
01320 #endif
01321 };
01322 
01350 static void
01351 csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
01352 {
01353     csv_file *csv;
01354     int rc, i, ncnames, row1, convert = 0, useargs = 0;
01355     char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
01356     sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
01357     sqlite3_stmt *stmt = 0;
01358 
01359     if (argc < 2) {
01360         sqlite3_result_error(ctx, "need at least 2 arguments", -1);
01361         return;
01362     }
01363     tname = (char *) sqlite3_value_text(argv[0]);
01364     if (!tname) {
01365         sqlite3_result_error(ctx, "table name is NULL", -1);
01366         return;
01367     }
01368     fname = (char *) sqlite3_value_text(argv[1]);
01369     if (!fname) {
01370         sqlite3_result_error(ctx, "file name is NULL", -1);
01371         return;
01372     }
01373     csv = csv_open(fname,
01374                    (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
01375                    (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
01376     if (!csv) {
01377         sqlite3_result_error(ctx, "unable to open input file", -1);
01378 cleanup:
01379         if (stmt) {
01380             sqlite3_finalize(stmt);
01381         }
01382         append_free(&sql);
01383         if (coltypes) {
01384             sqlite3_free(coltypes);
01385         }
01386         if (csv) {
01387             csv_close(csv);
01388         }
01389         return;
01390     }
01391     if (!csv->sep && !csv->quot) {
01392         csv_guess(csv);
01393     }
01394     csv->pos0 = 0;
01395     row1 = 0;
01396     if (argc > 2) {
01397         row1 = sqlite3_value_int(argv[2]);
01398     }
01399     if (row1) {
01400         /* use column names from 1st row */
01401         csv_getline(csv, 0);
01402         if (csv->ncols < 1) {
01403             sqlite3_result_error(ctx, "unable to get column names", -1);
01404             goto cleanup;
01405         }
01406         csv->pos0 = csv_tell(csv);
01407         csv_rewind(csv);
01408         ncnames = csv_ncols(csv);
01409         cnames = csv->cols;
01410     } else if (argc > 6) {
01411         ncnames = argc - 6;
01412         cnames = 0;
01413         useargs = 1;
01414     } else {
01415         /* use number of columns from 1st row */
01416         csv_getline(csv, 0);
01417         if (csv->ncols < 1) {
01418             sqlite3_result_error(ctx, "unable to get column names", -1);
01419             goto cleanup;
01420         }
01421         csv_rewind(csv);
01422         ncnames = csv_ncols(csv);
01423         cnames = 0;
01424     }
01425     convert = 0;
01426     if (argc > 3) {
01427         convert = sqlite3_value_int(argv[3]);
01428         if (row1 && (convert & 4)) {
01429             conv_names(cnames, ncnames);
01430         }
01431     }
01432     /* test if table exists */
01433     append(&sql, "PRAGMA table_info(", 0);
01434     append(&sql, tname, '"');
01435     append(&sql, ")", 0);
01436     if (!sql) {
01437 oom:
01438         sqlite3_result_error(ctx, "out of memory", -1);
01439         goto cleanup;
01440     }
01441     rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01442     append_free(&sql);
01443     if (rc != SQLITE_OK) {
01444 prepfail:
01445         sqlite3_result_error(ctx, "prepare failed", -1);
01446         goto cleanup;
01447     }
01448     /* find number of colums */
01449     i = 0;
01450     rc = sqlite3_step(stmt);
01451     while (rc == SQLITE_ROW) {
01452         i++;
01453         rc = sqlite3_step(stmt);
01454     }
01455     if (rc != SQLITE_DONE) {
01456 selfail:
01457         sqlite3_result_error(ctx, "select failed", -1);
01458         goto cleanup;
01459     }
01460     if (i > 0) {
01461         /* get column types */
01462         sqlite3_reset(stmt);
01463         ncnames = i;
01464         coltypes = sqlite3_malloc(ncnames);
01465         if (!coltypes) {
01466             goto oom;
01467         }
01468         rc = sqlite3_step(stmt);
01469         i = 0;
01470         while (rc == SQLITE_ROW) {
01471             coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
01472             rc = sqlite3_step(stmt);
01473         }
01474         if (rc != SQLITE_DONE) {
01475             goto selfail;
01476         }
01477     } else {
01478         /* create new table */
01479         sqlite3_finalize(stmt);
01480         stmt = 0;
01481         coltypes = sqlite3_malloc(ncnames);
01482         if (!coltypes) {
01483             goto oom;
01484         }
01485         append(&sql, "CREATE TABLE ", 0);
01486         append(&sql, tname, '"');
01487         append(&sql, "(", 0);
01488         for (i = 0; i < ncnames; i++) {
01489             char colname[64];
01490 
01491             coltypes[i] = SQLITE_TEXT;
01492             if (useargs) {
01493                 char *type = (char *) sqlite3_value_text(argv[i + 6]);
01494 
01495                 if (!type) {
01496                     goto defcol;
01497                 }
01498                 append(&sql, type, 0);
01499                 while (*type && !strchr(" \t", *type)) {
01500                     type++;
01501                 }
01502                 while (*type && strchr(" \t", *type)) {
01503                     type++;
01504                 }
01505                 coltypes[i] = maptype(type);
01506             } else if (!cnames || !cnames[i]) {
01507 defcol:
01508                 sprintf(colname, "column_%d", i + 1);
01509                 append(&sql, colname, '"');
01510             } else if (row1 > 0) {
01511                 append(&sql, cnames[i], '"');
01512             } else if (row1 < 0) {
01513                 append(&sql, cnames[i], '"');
01514                 if (i + 6 < argc) {
01515                     char *type = (char *) sqlite3_value_text(argv[i + 6]);
01516 
01517                     if (type) {
01518                         append(&sql, " ", 0);
01519                         append(&sql, type, 0);
01520                         coltypes[i] = maptype(type);
01521                     }
01522                 }
01523             }
01524             if (i < ncnames - 1) {
01525                 append(&sql, ",", 0);
01526             }
01527         }
01528         append(&sql, ")", 0);
01529         rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01530         if (rc != SQLITE_OK) {
01531             goto prepfail;
01532         }
01533         rc = sqlite3_step(stmt);
01534         if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01535             sqlite3_result_error(ctx, "create table failed", -1);
01536             goto cleanup;
01537         }
01538         append_free(&sql);
01539     }
01540     sqlite3_finalize(stmt);
01541     stmt = 0;
01542     /* make INSERT statement */
01543     append(&sql, "INSERT INTO ", 0);
01544     append(&sql, tname, '"');
01545     append(&sql, " VALUES(", 0);
01546     for (i = 0; i < ncnames; i++) {
01547         append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
01548     }
01549     rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01550     if (rc != SQLITE_OK) {
01551         goto prepfail;
01552     }
01553     append_free(&sql);
01554     /* import the CSV file */
01555     row1 = 0;
01556     while (csv_getline(csv, 0) != EOF) {
01557         for (i = 0; i < ncnames; i++) {
01558             char *data = csv_coldata(csv, i);
01559 
01560             rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
01561             if (rc != SQLITE_OK) {
01562                 goto inserr;
01563             }
01564         }
01565         rc = sqlite3_step(stmt);
01566         if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01567             if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
01568 inserr:
01569                 sqlite3_result_error(ctx, "insert failed", -1);
01570                 goto cleanup;
01571             }
01572         } else {
01573             row1++;
01574         }
01575         sqlite3_reset(stmt);
01576     }
01577     sqlite3_result_int(ctx, row1);
01578     goto cleanup;
01579 }
01580 
01587 #ifndef STANDALONE
01588 static
01589 #endif
01590 int
01591 csv_vtab_init(sqlite3 *db)
01592 {
01593     sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
01594                             (void *) db, csv_import_func, 0, 0);
01595     return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
01596 }
01597 
01598 #ifndef STANDALONE
01599 
01608 int
01609 sqlite3_extension_init(sqlite3 *db, char **errmsg,
01610                        const sqlite3_api_routines *api)
01611 {
01612     SQLITE_EXTENSION_INIT2(api);
01613     return csv_vtab_init(db);
01614 }
01615 
01616 #endif

Generated on 1 Dec 2016 by doxygen.
Contact: chw@ch-werner.de