CMPSC 311, Introduction to Systems Programming

Unix System-Level I/O



Reading
Standards



Input / Output



Files
Each call to open(), read(), write() or close() involves a system call to the OS kernel.

Each operation on an open file needs to mention the file descriptor, but no other details about the file are required, since they are maintained in the Open File Table.



Open

#include <sys/types.h>  /* mode_t */
#include <sys/stat.h>   /* mode bits, S_xyz */
#include <fcntl.h>      /* open(), flag bits, O_xyz
*/

int open(char *filename, int flags, mode_t mode);

/* returns file descriptor if successful or -1 if not */

Examples
int fd1 = open("foo", O_RDONLY, 0);
if (fd1 == -1)
  { something went wrong, deal with it }
int fd2 = open("bar", O_WRONLY | O_APPEND, 0);
if (fd2 == -1)
  { something went wrong, deal with it }



To create a new file, open for writing,

int creat(const char *filename, mode_t mode);

This is equivalent to

open(filename, O_WRONLY | O_CREAT | O_TRUNC, mode);


 
The umask value, maintained per process
Example (CS:APP, pp. 864-865)

/* Default file permissions are DEF_MODE & ~DEF_UMASK */
#define DEF_MODE   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH
#define DEF_UMASK  S_IWGRP | S_IWOTH

umask(DEF_UMASK);
int fd3 = open("foobar",
O_CREAT | O_TRUNC | O_WRONLY, DEF_MODE);



Close

#include <unistd.h>

int close(int fd);

/* returns 0 if successful or -1 if not */




Read, Write (Seek later)

#include <unistd.h>
ssize_t read(int fd, void *buf, size_t n);

ssize_t write(int fd, const void *buf, size_t n);

See APUE Sec. 3.9 for a simple discussion of buffer size vs. runtime.

Where is the bug?

/* version 1 */

char c;


while (read(STDIN_FILENO, &c, 1) > 0)
  write(STDOUT_FILENO, &c, 1);

/* version 2 */

char A[100];


while (read(STDIN_FILENO, A, 100) > 0)
  write(STDOUT_FILENO, A, 100);

A "short count" is not an error.



A Robust I/O Package

This code is a revised version of the RIO package in CS:APP Sec. 10.4, which is in turn a rewrite of a similar package in Stevens, Unix Network Programming.  A similar pair of functions is described in APUE Sec. 14.8.

The goal is to deal with short counts in a convenient way.  The primary application is for network communication.

There are functions for
These functions have the same semantics as read() and write().  The buffered version gives you a hint about what the C Standard I/O library does.



In the include file,

/* unbuffered, read or write operations */

ssize_t rio_readn(int fd, void *usrbuf, size_t n);
ssize_t rio_writen(int fd, void *usrbuf, size_t n);

/* Returns
 *   number of bytes transferred if OK
 *   0 on EOF (rio_readn only)
 *   -1 on error
 * rio_readn returns a short count only on EOF.
 * rio_writen never returns a short count.
 * Calls to these functions can be interleaved arbitrarily on the
 *   same file descriptor.
 */


/* buffered, read operations */

#define RIO_BUFSIZE 8192

typedef struct {
  int rio_fd;                /* file descriptor */
  size_t rio_cnt;            /* number of unread bytes in rio_buf */
  char *rio_bufptr;          /* next unread byte in rio_buf */
  char rio_buf[RIO_BUFSIZE]; /* internal buffer */
} rio_t;

void rio_readinitb(rio_t *rp, int fd);

ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n);
ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen);

/* Returns
 *   number of bytes read if OK
 *   0 on EOF
 *   -1 on error
 * rio_readinitb is called once per open file descriptor.
 * rio_readnb returns a short count only on EOF.
 * rio_readlineb truncates long lines and terminates them properly.
 *   If truncated, the next call to rio_readlineb obtains more of
 *   the same line.
 * Calls to these functions can be interleaved arbitrarily on the
 *   same buffer.
 */


/* Don't mix calls to the buffered and unbuffered versions.
 */


/* Wrappers for the Rio package
 *
 * These functions check the return value, and exit if there is an
 *   error.
 */

ssize_t Rio_readn(int fd, void *usrbuf, size_t n);
void    Rio_writen(int fd, void *usrbuf, size_t n);
void    Rio_readinitb(rio_t *rp, int fd);
ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n);
ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen);





Example wrapper function and usage

ssize_t Rio_readn(int fd, void *usrbuf, size_t n)
{
  ssize_t rc;

  if ((rc = rio_readn(fd, usrbuf, n)) == -1)
    report_error_and_exit("Rio_readn");

  return rc;
}



  int n;
  rio_t rio;
  char buf[MAXLINE];

  Rio_readinitb(&rio, STDIN_FILENO);

  while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0)
    Rio_writen(STDOUT_FILENO, buf, n);





In the source file,

/*
 * rio_readn - Robustly read n bytes (unbuffered)
 * returns a byte count, or 0 on EOF, or -1 on error
 *
 * If fewer than n bytes are available, returns normally.
 *
 * When n bytes are requested, with n = 0, the rio_ functions do
 *   nothing.   The standard functions read() and write() would
 *   check the other arguments for validity, and then take no
 *   other action.
 */

ssize_t rio_readn(int fd, void *usrbuf, size_t n)
{
  size_t nleft = n;        /* 0 <= nleft == n */
  char *bufp = usrbuf;

  while (nleft > 0) {      /* loop invariant: 0 <= nleft <= n */
    ssize_t rc = read(fd, bufp, nleft);
    if (rc < 0) {          /* read() error */
      if (errno == EINTR)  /* interrupted by a signal */
        continue;          /* no data was read, try again */
      else
        return -1;         /* errno set by read(), give up */
        /* ??? It may be that some data was read successfully
         * on a previous iteration.  Is it correct to give up
         * entirely?  In some cases, yes, but always?
         */
    }
    if (rc == 0)           /* EOF */
      break;
    bufp += rc;            /* read() success, 0 < rc <= nleft */
    nleft -= rc;           /* 0 <= new nleft < old nleft <= n */
  }

  return (n - nleft);      /* return >= 0 */
}

Exercise.  Show that bufp + nleft is equal to &bufp[n], and explain why bufp + nleft is not equal to &usrbuf[n].



/*
 * rio_writen - Robustly write n bytes (unbuffered)
 * returns a byte count, or -1 on error
 *
 * If fewer than n bytes are written, returns error.
 */

ssize_t rio_writen(int fd, void *usrbuf, size_t n)
{
  size_t nleft = n;        /* 0 <= nleft == n */
  char *bufp = usrbuf;

  while (nleft > 0) {      /* loop invariant: 0 <= nleft <= n */
    ssize_t rc = write(fd, bufp, nleft);
    if (rc < 0) {          /* write() error */
      if (errno == EINTR)  /* interrupted by a signal */
        continue;          /* no data was written, try again */
      else
        return -1;         /* errno set by write(), give up */
        /* ??? It may be that some data was written successfully
         * on a previous iteration.  Is it correct to give up
         * entirely?  In some cases, yes, but always?
         */
    }
    if (rc == 0)           /* nothing written, but not an error */
      continue;            /* try again */
    bufp += rc;            /* write() success, 0 < rc <= nleft */
    nleft -= rc;           /* 0 <= new nleft < old nleft <= n */
  }

  return n;
}




Reminder, from the include file,

#define RIO_BUFSIZE 8192

typedef struct {
  int rio_fd;                /* file descriptor */
  size_t rio_cnt;            /* number of unread bytes in rio_buf */
  char *rio_bufptr;          /* next unread byte in rio_buf */
  char rio_buf[RIO_BUFSIZE]; /* internal buffer */
} rio_t;

void rio_readinitb(rio_t *rp, int fd);


In the source file,

/*
 * rio_readinitb - Associate a descriptor with a read buffer and reset buffer
 */

void rio_readinitb(rio_t *rp, int fd)
{
  rp->rio_fd = fd;
  rp->rio_cnt = 0;
  rp->rio_bufptr = rp->rio_buf;

  return;
}




/*
 * rio_read - This is a wrapper for the Unix read() function that
 *   transfers min(n, rio_cnt) bytes from an internal buffer to a user
 *   buffer, where n is the number of bytes requested by the user and
 *   rio_cnt is the number of unread bytes in the internal buffer.  On
 *   entry, rio_read() refills the internal buffer via a call to
 *   read() if the internal buffer is empty.
 *
 *   Note that only rio_readinitb() and rio_read() modify the components
 *   of a rio_t buffer.
 *
 *   Note that although rio_read() is not called with n == 0, we check
 *   this condition before the loop to avoid filling the buffer prematurely.
 *   The test could be moved after the loop if a pre-fetch capability
 *   is desired.
 */

static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n)
{
  if (n == 0)
    return 0;

  while (rp->rio_cnt == 0) { /* refill if buffer is empty */
    ssize_t rc = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf));
    if (rc < 0) {            /* read() error */
      if (errno == EINTR)    /* interrupted by a signal */
        continue;            /* no data was read, try again */
      else
        return -1;           /* errno set by read(), give up */
    }
    if (rc == 0)             /* EOF */
      return 0;
    rp->rio_bufptr = rp->rio_buf;  /* read() success, buffer is filled */
    rp->rio_cnt = rc;        /* 0 < rc <= sizeof(rp->rio_buf) */
  }

  /* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */
  size_t cnt = rp->rio_cnt;  /* 0 < rp->rio_cnt */
  if (n < cnt)              
/* 0 < n */
      cnt = n;
  (void) memcpy(usrbuf, rp->rio_bufptr, cnt);
  rp->rio_bufptr += cnt;
  rp->rio_cnt -= cnt;

  return cnt;
}



/*
 * rio_readnb - Robustly read n bytes (buffered)
 * returns a byte count, or 0 on EOF, or -1 on error
 *
 * same reasoning as rio_readn()
 */

ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n)
{
  size_t nleft = n;
  char *bufp = usrbuf;

  while (nleft > 0) {
    ssize_t rc = rio_read(rp, bufp, nleft);
    if (rc < 0) {           /* read() error */
      if (errno == EINTR)   /* interrupted by a signal */
                            /* ??? can this actually happen? */
        continue;           /* no data was read, try again */
      else
        return -1;          /* errno set by read(), give up */
        /* ??? same question as before */
    }
    if (rc == 0)            /* EOF */
      break;
    bufp += rc;             /* read() success, 0 < rc <= nleft */
    nleft -= rc;
  }

  return (n - nleft);       /* return >= 0 */
}




/*
 * rio_readlineb - Robustly read a text line (buffered)
 * returns a byte count, or 0 on EOF, or -1 on error
 *
 * same reasoning as rio_readn()
 */

ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen)
{
  size_t n;
  char *bufp = usrbuf;

  /* ??? It might be proper to return -1 if maxlen is 0 or 1,
   * but, what should be the value of errno?
   */

  for (n = 1; n < maxlen; n++) {
    ssize_t rc = rio_read(rp, bufp, 1);
    if (rc < 0)
      return -1;          /* errno set by read(), give up */
      /* ??? same question as before.
       * Should we terminate the string before returning?
       */
    if (rc == 0) {
      if (n == 1)
        return 0;         /* EOF, no data read */
      else
        break;            /* EOF, some data was read */
    }
    if (*bufp++ == '\n')  /* read() success, 0 < rc <= 1 */
      break;
  }
  *bufp = '\0';

  return n;
}





Last revised, 18 Feb. 2013