diff options
Diffstat (limited to 'src/fileio.c')
-rw-r--r-- | src/fileio.c | 8750 |
1 files changed, 8750 insertions, 0 deletions
diff --git a/src/fileio.c b/src/fileio.c new file mode 100644 index 0000000000..22339bf8e5 --- /dev/null +++ b/src/fileio.c @@ -0,0 +1,8750 @@ +/* vi:set ts=8 sts=4 sw=4: + * + * VIM - Vi IMproved by Bram Moolenaar + * + * Do ":help uganda" in Vim to read copying and usage conditions. + * Do ":help credits" in Vim to see a list of people who contributed. + * See README.txt for an overview of the Vim source code. + */ + +/* + * fileio.c: read from and write to a file + */ + +#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) +# include <io.h> /* for lseek(), must be before vim.h */ +#endif + +#if defined __EMX__ +# include <io.h> /* for mktemp(), CJW 1997-12-03 */ +#endif + +#include "vim.h" + +#ifdef HAVE_FCNTL_H +# include <fcntl.h> +#endif + +#ifdef __TANDEM +# include <limits.h> /* for SSIZE_MAX */ +#endif + +#if defined(HAVE_UTIME) && defined(HAVE_UTIME_H) +# include <utime.h> /* for struct utimbuf */ +#endif + +#define BUFSIZE 8192 /* size of normal write buffer */ +#define SMBUFSIZE 256 /* size of emergency write buffer */ + +#ifdef FEAT_CRYPT +# define CRYPT_MAGIC "VimCrypt~01!" /* "01" is the version nr */ +# define CRYPT_MAGIC_LEN 12 /* must be multiple of 4! */ +#endif + +/* Is there any system that doesn't have access()? */ +#ifndef MACOS_CLASSIC /* Not available on MacOS 9 */ +# define USE_MCH_ACCESS +#endif + +#ifdef FEAT_MBYTE +static char_u *next_fenc __ARGS((char_u **pp)); +# ifdef FEAT_EVAL +static char_u *readfile_charconvert __ARGS((char_u *fname, char_u *fenc, int *fdp)); +# endif +#endif +#ifdef FEAT_VIMINFO +static void check_marks_read __ARGS((void)); +#endif +#ifdef FEAT_CRYPT +static char_u *check_for_cryptkey __ARGS((char_u *cryptkey, char_u *ptr, long *sizep, long *filesizep, int newfile)); +#endif +#ifdef UNIX +static void set_file_time __ARGS((char_u *fname, time_t atime, time_t mtime)); +#endif +static void msg_add_fname __ARGS((buf_T *, char_u *)); +static int msg_add_fileformat __ARGS((int eol_type)); +static void msg_add_lines __ARGS((int, long, long)); +static void msg_add_eol __ARGS((void)); +static int check_mtime __ARGS((buf_T *buf, struct stat *s)); +static int time_differs __ARGS((long t1, long t2)); +#ifdef FEAT_AUTOCMD +static int apply_autocmds_exarg __ARGS((EVENT_T event, char_u *fname, char_u *fname_io, int force, buf_T *buf, exarg_T *eap)); +#endif + +#if defined(FEAT_CRYPT) || defined(FEAT_MBYTE) +# define HAS_BW_FLAGS +# define FIO_LATIN1 0x01 /* convert Latin1 */ +# define FIO_UTF8 0x02 /* convert UTF-8 */ +# define FIO_UCS2 0x04 /* convert UCS-2 */ +# define FIO_UCS4 0x08 /* convert UCS-4 */ +# define FIO_UTF16 0x10 /* convert UTF-16 */ +# ifdef WIN3264 +# define FIO_CODEPAGE 0x20 /* convert MS-Windows codepage */ +# define FIO_PUT_CP(x) (((x) & 0xffff) << 16) /* put codepage in top word */ +# define FIO_GET_CP(x) (((x)>>16) & 0xffff) /* get codepage from top word */ +# endif +# ifdef MACOS_X +# define FIO_MACROMAN 0x20 /* convert MacRoman */ +# endif +# define FIO_ENDIAN_L 0x80 /* little endian */ +# define FIO_ENCRYPTED 0x1000 /* encrypt written bytes */ +# define FIO_NOCONVERT 0x2000 /* skip encoding conversion */ +# define FIO_UCSBOM 0x4000 /* check for BOM at start of file */ +# define FIO_ALL -1 /* allow all formats */ +#endif + +/* When converting, a read() or write() may leave some bytes to be converted + * for the next call. The value is guessed... */ +#define CONV_RESTLEN 30 + +/* We have to guess how much a sequence of bytes may expand when converting + * with iconv() to be able to allocate a buffer. */ +#define ICONV_MULT 8 + +/* + * Structure to pass arguments from buf_write() to buf_write_bytes(). + */ +struct bw_info +{ + int bw_fd; /* file descriptor */ + char_u *bw_buf; /* buffer with data to be written */ + int bw_len; /* lenght of data */ +#ifdef HAS_BW_FLAGS + int bw_flags; /* FIO_ flags */ +#endif +#ifdef FEAT_MBYTE + char_u bw_rest[CONV_RESTLEN]; /* not converted bytes */ + int bw_restlen; /* nr of bytes in bw_rest[] */ + int bw_first; /* first write call */ + char_u *bw_conv_buf; /* buffer for writing converted chars */ + int bw_conv_buflen; /* size of bw_conv_buf */ + int bw_conv_error; /* set for conversion error */ +# ifdef USE_ICONV + iconv_t bw_iconv_fd; /* descriptor for iconv() or -1 */ +# endif +#endif +}; + +static int buf_write_bytes __ARGS((struct bw_info *ip)); + +#ifdef FEAT_MBYTE +static int ucs2bytes __ARGS((unsigned c, char_u **pp, int flags)); +static int same_encoding __ARGS((char_u *a, char_u *b)); +static int get_fio_flags __ARGS((char_u *ptr)); +static char_u *check_for_bom __ARGS((char_u *p, long size, int *lenp, int flags)); +static int make_bom __ARGS((char_u *buf, char_u *name)); +# ifdef WIN3264 +static int get_win_fio_flags __ARGS((char_u *ptr)); +# endif +# ifdef MACOS_X +static int get_mac_fio_flags __ARGS((char_u *ptr)); +# endif +#endif +static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf)); + +static linenr_T write_no_eol_lnum = 0; /* non-zero lnum when last line of + next binary write should not have + an end-of-line */ + + void +filemess(buf, name, s, attr) + buf_T *buf; + char_u *name; + char_u *s; + int attr; +{ + int msg_scroll_save; + + if (msg_silent != 0) + return; + msg_add_fname(buf, name); /* put file name in IObuff with quotes */ + /* If it's extremely long, truncate it. */ + if (STRLEN(IObuff) > IOSIZE - 80) + IObuff[IOSIZE - 80] = NUL; + STRCAT(IObuff, s); + /* + * For the first message may have to start a new line. + * For further ones overwrite the previous one, reset msg_scroll before + * calling filemess(). + */ + msg_scroll_save = msg_scroll; + if (shortmess(SHM_OVERALL) && !exiting && p_verbose == 0) + msg_scroll = FALSE; + if (!msg_scroll) /* wait a bit when overwriting an error msg */ + check_for_delay(FALSE); + msg_start(); + msg_scroll = msg_scroll_save; + msg_scrolled_ign = TRUE; + /* may truncate the message to avoid a hit-return prompt */ + msg_outtrans_attr(msg_may_trunc(FALSE, IObuff), attr); + msg_clr_eos(); + out_flush(); + msg_scrolled_ign = FALSE; +} + +/* + * Read lines from file "fname" into the buffer after line "from". + * + * 1. We allocate blocks with lalloc, as big as possible. + * 2. Each block is filled with characters from the file with a single read(). + * 3. The lines are inserted in the buffer with ml_append(). + * + * (caller must check that fname != NULL, unless READ_STDIN is used) + * + * "lines_to_skip" is the number of lines that must be skipped + * "lines_to_read" is the number of lines that are appended + * When not recovering lines_to_skip is 0 and lines_to_read MAXLNUM. + * + * flags: + * READ_NEW starting to edit a new buffer + * READ_FILTER reading filter output + * READ_STDIN read from stdin instead of a file + * READ_BUFFER read from curbuf instead of a file (converting after reading + * stdin) + * READ_DUMMY read into a dummy buffer (to check if file contents changed) + * + * return FAIL for failure, OK otherwise + */ + int +readfile(fname, sfname, from, lines_to_skip, lines_to_read, eap, flags) + char_u *fname; + char_u *sfname; + linenr_T from; + linenr_T lines_to_skip; + linenr_T lines_to_read; + exarg_T *eap; /* can be NULL! */ + int flags; +{ + int fd = 0; + int newfile = (flags & READ_NEW); + int check_readonly; + int filtering = (flags & READ_FILTER); + int read_stdin = (flags & READ_STDIN); + int read_buffer = (flags & READ_BUFFER); + linenr_T read_buf_lnum = 1; /* next line to read from curbuf */ + colnr_T read_buf_col = 0; /* next char to read from this line */ + char_u c; + linenr_T lnum = from; + char_u *ptr = NULL; /* pointer into read buffer */ + char_u *buffer = NULL; /* read buffer */ + char_u *new_buffer = NULL; /* init to shut up gcc */ + char_u *line_start = NULL; /* init to shut up gcc */ + int wasempty; /* buffer was empty before reading */ + colnr_T len; + long size = 0; + char_u *p; + long filesize = 0; + int skip_read = FALSE; +#ifdef FEAT_CRYPT + char_u *cryptkey = NULL; +#endif + int split = 0; /* number of split lines */ +#define UNKNOWN 0x0fffffff /* file size is unknown */ + linenr_T linecnt; + int error = FALSE; /* errors encountered */ + int ff_error = EOL_UNKNOWN; /* file format with errors */ + long linerest = 0; /* remaining chars in line */ +#ifdef UNIX + int perm = 0; + int swap_mode = -1; /* protection bits for swap file */ +#else + int perm; +#endif + int fileformat = 0; /* end-of-line format */ + int keep_fileformat = FALSE; + struct stat st; + int file_readonly; + linenr_T skip_count = 0; + linenr_T read_count = 0; + int msg_save = msg_scroll; + linenr_T read_no_eol_lnum = 0; /* non-zero lnum when last line of + * last read was missing the eol */ + int try_mac = (vim_strchr(p_ffs, 'm') != NULL); + int try_dos = (vim_strchr(p_ffs, 'd') != NULL); + int try_unix = (vim_strchr(p_ffs, 'x') != NULL); + int file_rewind = FALSE; +#ifdef FEAT_MBYTE + int can_retry; + int conv_error = FALSE; /* conversion error detected */ + int keep_dest_enc = FALSE; /* don't retry when char doesn't fit + in destination encoding */ + linenr_T illegal_byte = 0; /* line nr with illegal byte */ + char_u *tmpname = NULL; /* name of 'charconvert' output file */ + int fio_flags = 0; + char_u *fenc; /* fileencoding to use */ + int fenc_alloced; /* fenc_next is in allocated memory */ + char_u *fenc_next = NULL; /* next item in 'fencs' or NULL */ + int advance_fenc = FALSE; + long real_size = 0; +# ifdef USE_ICONV + iconv_t iconv_fd = (iconv_t)-1; /* descriptor for iconv() or -1 */ +# ifdef FEAT_EVAL + int did_iconv = FALSE; /* TRUE when iconv() failed and trying + 'charconvert' next */ +# endif +# endif + int converted = FALSE; /* TRUE if conversion done */ + int notconverted = FALSE; /* TRUE if conversion wanted but it + wasn't possible */ + char_u conv_rest[CONV_RESTLEN]; + int conv_restlen = 0; /* nr of bytes in conv_rest[] */ +#endif + +#ifdef FEAT_AUTOCMD + write_no_eol_lnum = 0; /* in case it was set by the previous read */ +#endif + + /* + * If there is no file name yet, use the one for the read file. + * BF_NOTEDITED is set to reflect this. + * Don't do this for a read from a filter. + * Only do this when 'cpoptions' contains the 'f' flag. + */ + if (curbuf->b_ffname == NULL + && !filtering + && fname != NULL + && vim_strchr(p_cpo, CPO_FNAMER) != NULL + && !(flags & READ_DUMMY)) + { + if (setfname(curbuf, fname, sfname, FALSE) == OK) + curbuf->b_flags |= BF_NOTEDITED; + } + + /* + * For Unix: Use the short file name whenever possible. + * Avoids problems with networks and when directory names are changed. + * Don't do this for MS-DOS, a "cd" in a sub-shell may have moved us to + * another directory, which we don't detect. + */ + if (sfname == NULL) + sfname = fname; +#if defined(UNIX) || defined(__EMX__) + fname = sfname; +#endif + +#ifdef FEAT_AUTOCMD + /* + * The BufReadCmd and FileReadCmd events intercept the reading process by + * executing the associated commands instead. + */ + if (!filtering && !read_stdin && !read_buffer) + { + pos_T pos; + + pos = curbuf->b_op_start; + + /* Set '[ mark to the line above where the lines go (line 1 if zero). */ + curbuf->b_op_start.lnum = ((from == 0) ? 1 : from); + curbuf->b_op_start.col = 0; + + if (newfile) + { + if (apply_autocmds_exarg(EVENT_BUFREADCMD, NULL, sfname, + FALSE, curbuf, eap)) +#ifdef FEAT_EVAL + return aborting() ? FAIL : OK; +#else + return OK; +#endif + } + else if (apply_autocmds_exarg(EVENT_FILEREADCMD, sfname, sfname, + FALSE, NULL, eap)) +#ifdef FEAT_EVAL + return aborting() ? FAIL : OK; +#else + return OK; +#endif + + curbuf->b_op_start = pos; + } +#endif + + if ((shortmess(SHM_OVER) || curbuf->b_help) && p_verbose == 0) + msg_scroll = FALSE; /* overwrite previous file message */ + else + msg_scroll = TRUE; /* don't overwrite previous file message */ + + /* + * If the name ends in a path separator, we can't open it. Check here, + * because reading the file may actually work, but then creating the swap + * file may destroy it! Reported on MS-DOS and Win 95. + * If the name is too long we might crash further on, quit here. + */ + if (fname != NULL + && *fname != NUL + && (vim_ispathsep(*(fname + STRLEN(fname) - 1)) + || STRLEN(fname) >= MAXPATHL)) + { + filemess(curbuf, fname, (char_u *)_("Illegal file name"), 0); + msg_end(); + msg_scroll = msg_save; + return FAIL; + } + +#ifdef UNIX + /* + * On Unix it is possible to read a directory, so we have to + * check for it before the mch_open(). + */ + if (!read_stdin && !read_buffer) + { + perm = mch_getperm(fname); + if (perm >= 0 && !S_ISREG(perm) /* not a regular file ... */ +# ifdef S_ISFIFO + && !S_ISFIFO(perm) /* ... or fifo */ +# endif +# ifdef S_ISSOCK + && !S_ISSOCK(perm) /* ... or socket */ +# endif + ) + { + if (S_ISDIR(perm)) + filemess(curbuf, fname, (char_u *)_("is a directory"), 0); + else + filemess(curbuf, fname, (char_u *)_("is not a file"), 0); + msg_end(); + msg_scroll = msg_save; + return FAIL; + } + } +#endif + + /* set default 'fileformat' */ + if (newfile) + { + if (eap != NULL && eap->force_ff != 0) + set_fileformat(get_fileformat_force(curbuf, eap), OPT_LOCAL); + else if (*p_ffs != NUL) + set_fileformat(default_fileformat(), OPT_LOCAL); + } + + /* set or reset 'binary' */ + if (eap != NULL && eap->force_bin != 0) + { + int oldval = curbuf->b_p_bin; + + curbuf->b_p_bin = (eap->force_bin == FORCE_BIN); + set_options_bin(oldval, curbuf->b_p_bin, OPT_LOCAL); + } + + /* + * When opening a new file we take the readonly flag from the file. + * Default is r/w, can be set to r/o below. + * Don't reset it when in readonly mode + * Only set/reset b_p_ro when BF_CHECK_RO is set. + */ + check_readonly = (newfile && (curbuf->b_flags & BF_CHECK_RO)); + if (check_readonly && !readonlymode) /* default: set file not readonly */ + curbuf->b_p_ro = FALSE; + + if (newfile && !read_stdin && !read_buffer) + { + /* Remember time of file. + * For RISCOS, also remember the filetype. + */ + if (mch_stat((char *)fname, &st) >= 0) + { + buf_store_time(curbuf, &st, fname); + curbuf->b_mtime_read = curbuf->b_mtime; + +#if defined(RISCOS) && defined(FEAT_OSFILETYPE) + /* Read the filetype into the buffer local filetype option. */ + mch_read_filetype(fname); +#endif +#ifdef UNIX + /* + * Use the protection bits of the original file for the swap file. + * This makes it possible for others to read the name of the + * edited file from the swapfile, but only if they can read the + * edited file. + * Remove the "write" and "execute" bits for group and others + * (they must not write the swapfile). + * Add the "read" and "write" bits for the user, otherwise we may + * not be able to write to the file ourselves. + * Setting the bits is done below, after creating the swap file. + */ + swap_mode = (st.st_mode & 0644) | 0600; +#endif +#ifdef FEAT_CW_EDITOR + /* Get the FSSpec on MacOS + * TODO: Update it properly when the buffer name changes + */ + (void)GetFSSpecFromPath(curbuf->b_ffname, &curbuf->b_FSSpec); +#endif +#ifdef VMS + curbuf->b_fab_rfm = st.st_fab_rfm; +#endif + } + else + { + curbuf->b_mtime = 0; + curbuf->b_mtime_read = 0; + curbuf->b_orig_size = 0; + curbuf->b_orig_mode = 0; + } + + /* Reset the "new file" flag. It will be set again below when the + * file doesn't exist. */ + curbuf->b_flags &= ~(BF_NEW | BF_NEW_W); + } + +/* + * for UNIX: check readonly with perm and mch_access() + * for RISCOS: same as Unix, otherwise file gets re-datestamped! + * for MSDOS and Amiga: check readonly by trying to open the file for writing + */ + file_readonly = FALSE; + if (read_stdin) + { +#if defined(MSDOS) || defined(MSWIN) || defined(OS2) + /* Force binary I/O on stdin to avoid CR-LF -> LF conversion. */ + setmode(0, O_BINARY); +#endif + } + else if (!read_buffer) + { +#ifdef USE_MCH_ACCESS + if ( +# ifdef UNIX + !(perm & 0222) || +# endif + mch_access((char *)fname, W_OK)) + file_readonly = TRUE; + fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0); +#else + if (!newfile + || readonlymode + || (fd = mch_open((char *)fname, O_RDWR | O_EXTRA, 0)) < 0) + { + file_readonly = TRUE; + /* try to open ro */ + fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0); + } +#endif + } + + if (fd < 0) /* cannot open at all */ + { +#ifndef UNIX + int isdir_f; +#endif + msg_scroll = msg_save; +#ifndef UNIX + /* + * On MSDOS and Amiga we can't open a directory, check here. + */ + isdir_f = (mch_isdir(fname)); + perm = mch_getperm(fname); /* check if the file exists */ + if (isdir_f) + { + filemess(curbuf, sfname, (char_u *)_("is a directory"), 0); + curbuf->b_p_ro = TRUE; /* must use "w!" now */ + } + else +#endif + if (newfile) + { + if (perm < 0) + { + /* + * Set the 'new-file' flag, so that when the file has + * been created by someone else, a ":w" will complain. + */ + curbuf->b_flags |= BF_NEW; + + /* Create a swap file now, so that other Vims are warned + * that we are editing this file. Don't do this for a + * "nofile" or "nowrite" buffer type. */ +#ifdef FEAT_QUICKFIX + if (!bt_dontwrite(curbuf)) +#endif + check_need_swap(newfile); + filemess(curbuf, sfname, (char_u *)_("[New File]"), 0); +#ifdef FEAT_VIMINFO + /* Even though this is a new file, it might have been + * edited before and deleted. Get the old marks. */ + check_marks_read(); +#endif +#ifdef FEAT_MBYTE + if (eap != NULL && eap->force_enc != 0) + { + /* set forced 'fileencoding' */ + fenc = enc_canonize(eap->cmd + eap->force_enc); + if (fenc != NULL) + set_string_option_direct((char_u *)"fenc", -1, + fenc, OPT_FREE|OPT_LOCAL); + vim_free(fenc); + } +#endif +#ifdef FEAT_AUTOCMD + apply_autocmds_exarg(EVENT_BUFNEWFILE, sfname, sfname, + FALSE, curbuf, eap); +#endif + /* remember the current fileformat */ + save_file_ff(curbuf); + +#if defined(FEAT_AUTOCMD) && defined(FEAT_EVAL) + if (aborting()) /* autocmds may abort script processing */ + return FAIL; +#endif + return OK; /* a new file is not an error */ + } + else + { + filemess(curbuf, sfname, + (char_u *)_("[Permission Denied]"), 0); + curbuf->b_p_ro = TRUE; /* must use "w!" now */ + } + } + + return FAIL; + } + + /* + * Only set the 'ro' flag for readonly files the first time they are + * loaded. Help files always get readonly mode + */ + if ((check_readonly && file_readonly) || curbuf->b_help) + curbuf->b_p_ro = TRUE; + + if (newfile) + { + curbuf->b_p_eol = TRUE; + curbuf->b_start_eol = TRUE; +#ifdef FEAT_MBYTE + curbuf->b_p_bomb = FALSE; +#endif + } + + /* Create a swap file now, so that other Vims are warned that we are + * editing this file. + * Don't do this for a "nofile" or "nowrite" buffer type. */ +#ifdef FEAT_QUICKFIX + if (!bt_dontwrite(curbuf)) +#endif + { + check_need_swap(newfile); +#ifdef UNIX + /* Set swap file protection bits after creating it. */ + if (swap_mode > 0 && curbuf->b_ml.ml_mfp->mf_fname != NULL) + (void)mch_setperm(curbuf->b_ml.ml_mfp->mf_fname, (long)swap_mode); +#endif + } + +#if defined(FEAT_GUI_DIALOG) || defined(FEAT_CON_DIALOG) + /* If "Quit" selected at ATTENTION dialog, don't load the file */ + if (swap_exists_action == SEA_QUIT) + { + if (!read_buffer && !read_stdin) + close(fd); + return FAIL; + } +#endif + + ++no_wait_return; /* don't wait for return yet */ + + /* + * Set '[ mark to the line above where the lines go (line 1 if zero). + */ + curbuf->b_op_start.lnum = ((from == 0) ? 1 : from); + curbuf->b_op_start.col = 0; + +#ifdef FEAT_AUTOCMD + if (!read_buffer) + { + int m = msg_scroll; + int n = msg_scrolled; + buf_T *old_curbuf = curbuf; + + /* + * The file must be closed again, the autocommands may want to change + * the file before reading it. + */ + if (!read_stdin) + close(fd); /* ignore errors */ + + /* + * The output from the autocommands should not overwrite anything and + * should not be overwritten: Set msg_scroll, restore its value if no + * output was done. + */ + msg_scroll = TRUE; + if (filtering) + apply_autocmds_exarg(EVENT_FILTERREADPRE, NULL, sfname, + FALSE, curbuf, eap); + else if (read_stdin) + apply_autocmds_exarg(EVENT_STDINREADPRE, NULL, sfname, + FALSE, curbuf, eap); + else if (newfile) + apply_autocmds_exarg(EVENT_BUFREADPRE, NULL, sfname, + FALSE, curbuf, eap); + else + apply_autocmds_exarg(EVENT_FILEREADPRE, sfname, sfname, + FALSE, NULL, eap); + if (msg_scrolled == n) + msg_scroll = m; + +#ifdef FEAT_EVAL + if (aborting()) /* autocmds may abort script processing */ + { + --no_wait_return; + msg_scroll = msg_save; + curbuf->b_p_ro = TRUE; /* must use "w!" now */ + return FAIL; + } +#endif + /* + * Don't allow the autocommands to change the current buffer. + * Try to re-open the file. + */ + if (!read_stdin && (curbuf != old_curbuf + || (fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0)) < 0)) + { + --no_wait_return; + msg_scroll = msg_save; + if (fd < 0) + EMSG(_("E200: *ReadPre autocommands made the file unreadable")); + else + EMSG(_("E201: *ReadPre autocommands must not change current buffer")); + curbuf->b_p_ro = TRUE; /* must use "w!" now */ + return FAIL; + } + } +#endif /* FEAT_AUTOCMD */ + + /* Autocommands may add lines to the file, need to check if it is empty */ + wasempty = (curbuf->b_ml.ml_flags & ML_EMPTY); + + if (!recoverymode && !filtering && !(flags & READ_DUMMY)) + { + /* + * Show the user that we are busy reading the input. Sometimes this + * may take a while. When reading from stdin another program may + * still be running, don't move the cursor to the last line, unless + * always using the GUI. + */ + if (read_stdin) + { +#ifndef ALWAYS_USE_GUI + mch_msg(_("Vim: Reading from stdin...\n")); +#endif +#ifdef FEAT_GUI + /* Also write a message in the GUI window, if there is one. */ + if (gui.in_use && !gui.dying && !gui.starting) + { + p = (char_u *)_("Reading from stdin..."); + gui_write(p, (int)STRLEN(p)); + } +#endif + } + else if (!read_buffer) + filemess(curbuf, sfname, (char_u *)"", 0); + } + + msg_scroll = FALSE; /* overwrite the file message */ + + /* + * Set linecnt now, before the "retry" caused by a wrong guess for + * fileformat, and after the autocommands, which may change them. + */ + linecnt = curbuf->b_ml.ml_line_count; + +#ifdef FEAT_MBYTE + /* + * Decide which 'encoding' to use first. + */ + if (eap != NULL && eap->force_enc != 0) + { + fenc = enc_canonize(eap->cmd + eap->force_enc); + fenc_alloced = TRUE; + } + else if (curbuf->b_p_bin) + { + fenc = (char_u *)""; /* binary: don't convert */ + fenc_alloced = FALSE; + } + else if (curbuf->b_help) + { + char_u firstline[80]; + + /* Help files are either utf-8 or latin1. Try utf-8 first, if this + * fails it must be latin1. + * Always do this when 'encoding' is "utf-8". Otherwise only do + * this when needed to avoid [converted] remarks all the time. + * It is needed when the first line contains non-ASCII characters. + * That is only in *.??x files. */ + fenc = (char_u *)"latin1"; + c = enc_utf8; + if (!c && !read_stdin && TOLOWER_ASC(fname[STRLEN(fname) - 1]) == 'x') + { + /* Read the first line (and a bit more). Immediately rewind to + * the start of the file. If the read() fails "len" is -1. */ + len = vim_read(fd, firstline, 80); + lseek(fd, (off_t)0L, SEEK_SET); + for (p = firstline; p < firstline + len; ++p) + if (*p >= 0x80) + { + c = TRUE; + break; + } + } + + if (c) + { + fenc_next = fenc; + fenc = (char_u *)"utf-8"; + + /* When the file is utf-8 but a character doesn't fit in + * 'encoding' don't retry. In help text editing utf-8 bytes + * doesn't make sense. */ + keep_dest_enc = TRUE; + } + fenc_alloced = FALSE; + } + else if (*p_fencs == NUL) + { + fenc = curbuf->b_p_fenc; /* use format from buffer */ + fenc_alloced = FALSE; + } + else + { + fenc_next = p_fencs; /* try items in 'fileencodings' */ + fenc = next_fenc(&fenc_next); + fenc_alloced = TRUE; + } +#endif + + /* + * Jump back here to retry reading the file in different ways. + * Reasons to retry: + * - encoding conversion failed: try another one from "fenc_next" + * - BOM detected and fenc was set, need to setup conversion + * - "fileformat" check failed: try another + * + * Variables set for special retry actions: + * "file_rewind" Rewind the file to start reading it again. + * "advance_fenc" Advance "fenc" using "fenc_next". + * "skip_read" Re-use already read bytes (BOM detected). + * "did_iconv" iconv() conversion failed, try 'charconvert'. + * "keep_fileformat" Don't reset "fileformat". + * + * Other status indicators: + * "tmpname" When != NULL did conversion with 'charconvert'. + * Output file has to be deleted afterwards. + * "iconv_fd" When != -1 did conversion with iconv(). + */ +retry: + + if (file_rewind) + { + if (read_buffer) + { + read_buf_lnum = 1; + read_buf_col = 0; + } + else if (read_stdin || lseek(fd, (off_t)0L, SEEK_SET) != 0) + { + /* Can't rewind the file, give up. */ + error = TRUE; + goto failed; + } + /* Delete the previously read lines. */ + while (lnum > from) + ml_delete(lnum--, FALSE); + file_rewind = FALSE; +#ifdef FEAT_MBYTE + if (newfile) + curbuf->b_p_bomb = FALSE; + conv_error = FALSE; +#endif + } + + /* + * When retrying with another "fenc" and the first time "fileformat" + * will be reset. + */ + if (keep_fileformat) + keep_fileformat = FALSE; + else + { + if (eap != NULL && eap->force_ff != 0) + fileformat = get_fileformat_force(curbuf, eap); + else if (curbuf->b_p_bin) + fileformat = EOL_UNIX; /* binary: use Unix format */ + else if (*p_ffs == NUL) + fileformat = get_fileformat(curbuf);/* use format from buffer */ + else + fileformat = EOL_UNKNOWN; /* detect from file */ + } + +#ifdef FEAT_MBYTE +# ifdef USE_ICONV + if (iconv_fd != (iconv_t)-1) + { + /* aborted conversion with iconv(), close the descriptor */ + iconv_close(iconv_fd); + iconv_fd = (iconv_t)-1; + } +# endif + + if (advance_fenc) + { + /* + * Try the next entry in 'fileencodings'. + */ + advance_fenc = FALSE; + + if (eap != NULL && eap->force_enc != 0) + { + /* Conversion given with "++cc=" wasn't possible, read + * without conversion. */ + notconverted = TRUE; + conv_error = FALSE; + if (fenc_alloced) + vim_free(fenc); + fenc = (char_u *)""; + fenc_alloced = FALSE; + } + else + { + if (fenc_alloced) + vim_free(fenc); + if (fenc_next != NULL) + { + fenc = next_fenc(&fenc_next); + fenc_alloced = (fenc_next != NULL); + } + else + { + fenc = (char_u *)""; + fenc_alloced = FALSE; + } + } + if (tmpname != NULL) + { + mch_remove(tmpname); /* delete converted file */ + vim_free(tmpname); + tmpname = NULL; + } + } + + /* + * Conversion is required when the encoding of the file is different + * from 'encoding' or 'encoding' is UTF-16, UCS-2 or UCS-4 (requires + * conversion to UTF-8). + */ + fio_flags = 0; + converted = (*fenc != NUL && !same_encoding(p_enc, fenc)); + if (converted || enc_unicode != 0) + { + + /* "ucs-bom" means we need to check the first bytes of the file + * for a BOM. */ + if (STRCMP(fenc, ENC_UCSBOM) == 0) + fio_flags = FIO_UCSBOM; + + /* + * Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be + * done. This is handled below after read(). Prepare the + * fio_flags to avoid having to parse the string each time. + * Also check for Unicode to Latin1 conversion, because iconv() + * appears not to handle this correctly. This works just like + * conversion to UTF-8 except how the resulting character is put in + * the buffer. + */ + else if (enc_utf8 || STRCMP(p_enc, "latin1") == 0) + fio_flags = get_fio_flags(fenc); + +# ifdef WIN3264 + /* + * Conversion from an MS-Windows codepage to UTF-8 or another codepage + * is handled with MultiByteToWideChar(). + */ + if (fio_flags == 0) + fio_flags = get_win_fio_flags(fenc); +# endif + +# ifdef MACOS_X + /* Conversion from Apple MacRoman to latin1 or UTF-8 */ + if (fio_flags == 0) + fio_flags = get_mac_fio_flags(fenc); +# endif + +# ifdef USE_ICONV + /* + * Try using iconv() if we can't convert internally. + */ + if (fio_flags == 0 +# ifdef FEAT_EVAL + && !did_iconv +# endif + ) + iconv_fd = (iconv_t)my_iconv_open( + enc_utf8 ? (char_u *)"utf-8" : p_enc, fenc); +# endif + +# ifdef FEAT_EVAL + /* + * Use the 'charconvert' expression when conversion is required + * and we can't do it internally or with iconv(). + */ + if (fio_flags == 0 && !read_stdin && !read_buffer && *p_ccv != NUL +# ifdef USE_ICONV + && iconv_fd == (iconv_t)-1 +# endif + ) + { +# ifdef USE_ICONV + did_iconv = FALSE; +# endif + /* Skip conversion when it's already done (retry for wrong + * "fileformat"). */ + if (tmpname == NULL) + { + tmpname = readfile_charconvert(fname, fenc, &fd); + if (tmpname == NULL) + { + /* Conversion failed. Try another one. */ + advance_fenc = TRUE; + if (fd < 0) + { + /* Re-opening the original file failed! */ + EMSG(_("E202: Conversion made file unreadable!")); + error = TRUE; + goto failed; + } + goto retry; + } + } + } + else +# endif + { + if (fio_flags == 0 +# ifdef USE_ICONV + && iconv_fd == (iconv_t)-1 +# endif + ) + { + /* Conversion wanted but we can't. + * Try the next conversion in 'fileencodings' */ + advance_fenc = TRUE; + goto retry; + } + } + } + + /* Set can_retry when it's possible to rewind the file and try with + * another "fenc" value. It's FALSE when no other "fenc" to try, reading + * stdin or "fenc" was specified with "++enc=". */ + can_retry = (*fenc != NUL && !read_stdin + && (eap == NULL || eap->force_enc == 0)); +#endif + + if (!skip_read) + { + linerest = 0; + filesize = 0; + skip_count = lines_to_skip; + read_count = lines_to_read; +#ifdef FEAT_MBYTE + conv_restlen = 0; +#endif + } + + while (!error && !got_int) + { + /* + * We allocate as much space for the file as we can get, plus + * space for the old line plus room for one terminating NUL. + * The amount is limited by the fact that read() only can read + * upto max_unsigned characters (and other things). + */ +#if SIZEOF_INT <= 2 + if (linerest >= 0x7ff0) + { + ++split; + *ptr = NL; /* split line by inserting a NL */ + size = 1; + } + else +#endif + { + if (!skip_read) + { +#if SIZEOF_INT > 2 +# ifdef __TANDEM + size = SSIZE_MAX; /* use max I/O size, 52K */ +# else + size = 0x10000L; /* use buffer >= 64K */ +# endif +#else + size = 0x7ff0L - linerest; /* limit buffer to 32K */ +#endif + + for ( ; size >= 10; size = (long_u)size >> 1) + { + if ((new_buffer = lalloc((long_u)(size + linerest + 1), + FALSE)) != NULL) + break; + } + if (new_buffer == NULL) + { + do_outofmem_msg((long_u)(size * 2 + linerest + 1)); + error = TRUE; + break; + } + if (linerest) /* copy characters from the previous buffer */ + mch_memmove(new_buffer, ptr - linerest, (size_t)linerest); + vim_free(buffer); + buffer = new_buffer; + ptr = buffer + linerest; + line_start = buffer; + +#ifdef FEAT_MBYTE + /* May need room to translate into. + * For iconv() we don't really know the required space, use a + * factor ICONV_MULT. + * latin1 to utf-8: 1 byte becomes up to 2 bytes + * utf-16 to utf-8: 2 bytes become up to 3 bytes, 4 bytes + * become up to 4 bytes, size must be multiple of 2 + * ucs-2 to utf-8: 2 bytes become up to 3 bytes, size must be + * multiple of 2 + * ucs-4 to utf-8: 4 bytes become up to 6 bytes, size must be + * multiple of 4 */ + real_size = size; +# ifdef USE_ICONV + if (iconv_fd != (iconv_t)-1) + size = size / ICONV_MULT; + else +# endif + if (fio_flags & FIO_LATIN1) + size = size / 2; + else if (fio_flags & (FIO_UCS2 | FIO_UTF16)) + size = (size * 2 / 3) & ~1; + else if (fio_flags & FIO_UCS4) + size = (size * 2 / 3) & ~3; + else if (fio_flags == FIO_UCSBOM) + size = size / ICONV_MULT; /* worst case */ +# ifdef WIN3264 + else if (fio_flags & FIO_CODEPAGE) + size = size / ICONV_MULT; /* also worst case */ +# endif +# ifdef MACOS_X + else if (fio_flags & FIO_MACROMAN) + size = size / ICONV_MULT; /* also worst case */ +# endif +#endif + +#ifdef FEAT_MBYTE + if (conv_restlen > 0) + { + /* Insert unconverted bytes from previous line. */ + mch_memmove(ptr, conv_rest, conv_restlen); + ptr += conv_restlen; + size -= conv_restlen; + } +#endif + + if (read_buffer) + { + /* + * Read bytes from curbuf. Used for converting text read + * from stdin |