CMPSC 311,
Introduction to Systems Programming
File Sharing
File-Related Odds-and-Ends
Interprocess Communication
Reading
- CS:APP
- Sec. 10.5, Reading File Metadata
- Sec. 10.6, Sharing Files
- Sec. 10.7, I/O Redirection
- APUE, Sec. 3.10-12, 3.16, 15.1-3
File Sharing
Open files
- Descriptor table
- per process
- for each open file descriptor, a pointer to an entry in the
file table
- File table
- per system
- for each open file among all processes,
- current file position
- reference count (how many descriptor table entries
currently point to it)
- increment when a file is opened, decrement when closed
- if 0 then safe to delete this entry
- pointer to an entry in the v-node table
- V-node table
- per system
- information about the file
stat(), fstat() and lstat()
functions can retrieve much of this information
- Mac OS X
stat command, for example
- See CS:APP Sec. 10.6 for examples of these tables, with two
"normal" files in the same process, a file with two descriptors
in the same process, and a file shared between parent and child
after
fork().
File-Related
Odds-and-Ends
#include <unistd.h> // for
stat, fstat, lstat
#include <sys/stat.h> // for struct stat
int fstat(int fildes, struct stat *buf);
int stat(const char *restrict path, struct stat *restrict
buf);
int lstat(const char *restrict path, struct
stat *restrict buf);
// Returns 0 if ok, -1 if not
Example, in file x.c,
run on Mac OS X
#include <stdio.h>
#include <unistd.h> // for stat, fstat,
lstat
#include <sys/stat.h> // for struct stat
#include <string.h>
#include <errno.h>
int main(int argc, char *argv[])
{
struct stat info;
for (int i = 1; argv[i] != NULL; i++)
{
if (stat(argv[i], &info) ==
-1)
{
printf("%s: %s\n", argv[1], strerror(errno));
}
else
{
printf("%s:\n",
argv[i]);
//
type
printf(" st_dev %d\n",
info.st_dev); // dev_t
printf(" st_ino %d\n",
info.st_ino); // ino_t
printf(" st_mode %o\n",
info.st_mode); // mode_t
printf(" st_nlink %d\n", info.st_nlink); //
nlink_t
printf(" st_uid %d\n",
info.st_uid); // uid_t
printf(" st_gid %d\n",
info.st_gid); // gid_t
printf(" st_rdev %d\n",
info.st_rdev); // dev_t
printf(" st_size %d\n",
info.st_size); // off_t (64 bits)
printf(" st_size %lld\n", info.st_size); //
off_t (64 bits)
}
}
return 0;
}
% cc -std=c99 x.c
% ls -li x.c
2409618 -rw------- 1 dheller wheel 1070 Mar 15
11:42 x.c
% ./a.out x.c
x.c:
st_dev 234881026
st_ino 2409618
st_mode 100600
st_nlink 1
st_uid 8364
st_gid 0
st_rdev 0
st_size 0
st_size 1070
Posix Standard, 2004 (lightly edited)
struct stat {
dev_t
st_dev; Device ID of device
containing file.
ino_t
st_ino; File serial number.
mode_t st_mode; Mode of
file.
nlink_t st_nlink; Number of hard
links to the file.
uid_t
st_uid; User ID of file.
gid_t
st_gid; Group ID of file.
dev_t st_rdev;
Device ID (if file is character or block special).
off_t st_size;
For regular files, the file size in bytes.
For
symbolic
links,
the
length
in
bytes
of
the
pathname contained in the symbolic link.
For
a
shared
memory
object,
the
length
in
bytes.
For
a
typed
memory
object,
the
length
in
bytes.
For
other
file
types,
the
use
of
this
field is
unspecified.
time_t
st_atime; Time of last access.
time_t
st_mtime; Time of last data modification.
time_t
st_ctime; Time of last status change.
blksize_t st_blksize; A file
system-specific preferred I/O block
size for this object. In some file system
types, this may vary from file to file.
blkcnt_t
st_blocks; Number of blocks
allocated for this object.
};
Posix Standard, 2008 (lightly edited)
struct stat {
dev_t
st_dev; Device ID of device
containing file.
ino_t
st_ino; File serial
number.
mode_t st_mode; Mode of
file.
nlink_t st_nlink; Number of hard
links to the file.
uid_t
st_uid; User ID of file.
gid_t
st_gid; Group ID of
file.
dev_t st_rdev;
Device ID (if file is character or block special).
off_t st_size;
For regular files, the file size in bytes.
For
symbolic
links,
the
length
in
bytes
of
the
pathname contained in the symbolic link.
For
a
shared
memory
object,
the
length
in
bytes.
For
a
typed
memory
object,
the
length
in
bytes.
For
other
file
types,
the
use
of
this
field is
unspecified.
struct
timespec st_atim; Last data access timestamp.
struct
timespec st_mtim; Last data modification
timestamp.
struct
timespec st_ctim; Last file status change
timestamp.
blksize_t
st_blksize; A file
system-specific preferred I/O block
size for this object. In some file system
types, this may vary from file to file.
blkcnt_t
st_blocks; Number
of blocks allocated for this object.
};
Solaris (from the stat(2) man page, lightly edited)
struct stat {
dev_t
st_dev; /* ID of device containing
*/
/* a directory entry for this file */
ino_t
st_ino; /* Inode number */
mode_t
st_mode; /* File mode (see mknod(2)) */
nlink_t st_nlink; /*
Number of links */
uid_t
st_uid; /* User ID of the file's
owner */
gid_t
st_gid; /* Group ID of the file's
group */
dev_t
st_rdev; /* ID of device */
/* This entry is defined only for */
/* char special or block special files */
off_t
st_size; /* File size in bytes */
time_t st_atime;
/* Time of last access */
time_t st_mtime;
/* Time of last data modification */
time_t st_ctime;
/* Time of last file status change */
/* Times measured in seconds since */
/* 00:00:00 UTC, Jan. 1, 1970 */
long
st_blksize;
/* Preferred I/O block size */
blkcnt_t st_blocks; /* Number of 512 byte blocks
allocated*/
char
st_fstype[_ST_FSTYPSZ];
/*
Null-terminated type of filesystem */
};
The pair st_ino and st_dev
uniquely identifies regular files.
Mac OS X (from the stat(2) man page, lightly edited)
struct stat {
dev_t
st_dev; /* device inode resides on
*/
ino_t
st_ino; /* inode's number */
mode_t
st_mode; /* inode protection mode */
nlink_t st_nlink; /*
number or hard links to the file */
uid_t
st_uid; /* user-id of owner */
gid_t
st_gid; /* group-id of owner */
dev_t
st_rdev; /* device type, for special file
inode */
struct
timespec st_atimespec; /* time of last access */
struct
timespec st_mtimespec; /* time of last data modification
*/
struct
timespec st_ctimespec; /* time of last file status change
*/
off_t
st_size; /* file size, in bytes */
quad_t
st_blocks;
/* blocks allocated for file */
u_long
st_blksize;
/* optimal file sys I/O ops blocksize */
u_long
st_flags;
/* user defined flags for file */
u_long st_gen; /* file
generation number */
};
Mac OS X (the POSIX-compliant version, from <sys/stat.h>,
lightly edited)
XSI = the Posix option for the
X/Open System Interface Extension
_POSIX_C_SOURCE = compile for Posix conformance
_DARWIN_C_SOURCE = compile for Darwin (Mac OS X
kernel) conformance
struct stat {
dev_t
st_dev; /* [XSI] ID of
device containing file */
ino_t
st_ino; /* [XSI] File
serial number */
mode_t
st_mode; /* [XSI] Mode of file
(see below) */
nlink_t
st_nlink; /* [XSI] Number of hard
links */
uid_t
st_uid; /* [XSI] User ID
of the file */
gid_t
st_gid; /* [XSI] Group
ID of the file */
dev_t
st_rdev; /* [XSI] Device ID */
#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
struct timespec st_atimespec; /*
time of last access */
struct timespec st_mtimespec; /*
time of last data modification */
struct timespec st_ctimespec; /*
time of last status change */
#else
time_t
st_atime; /* [XSI] Time of last
access */
long
st_atimensec; /* nsec of last access */
time_t
st_mtime; /* [XSI] Last data
modification time */
long
st_mtimensec; /* last data modification nsec */
time_t
st_ctime; /* [XSI] Time of last
status change */
long
st_ctimensec; /* nsec of last status change */
#endif
off_t
st_size; /* [XSI] file size,
in bytes */
blkcnt_t
st_blocks; /* [XSI] blocks allocated for
file */
blksize_t
st_blksize; /* [XSI] optimal blocksize for I/O
*/
__uint32_t
st_flags; /* user defined flags for
file */
__uint32_t
st_gen; /* file
generation number */
__int32_t
st_lspare; /* RESERVED: DO NOT USE! */
__int64_t st_qspare[2]; /*
RESERVED: DO NOT USE! */
};
Atomic Operation
- a single operation composed of multiple steps
- either all steps are performed, or none are performed
Be careful - there are similar terms with slightly different
meanings in different contexts.
- Atomic instruction (in processor design)
- A single machine instruction which may involve several
memory actions, executed as a single uninterruptible
unit. No other instruction using the memory can be
interleaved with an atomic instruction.
- Intel x86 (IA-32, Intel 64) instruction set architecture,
lock
prefix, not described in CS:APP
- Atomic operation (in processor design)
- One or more machine instructions executed as a single
group. This may involve executing the instructions
without interruption, or by allowing interruption but ensuring
that its effect is harmless.
- sometimes called an indivisible operation
- MIPS instruction set architecture,
ll/sc
instructions, see Patterson & Hennessy, Sec. 2.11, covered
in CMPEN 431
- Atomic transaction (in database design)
- A single logical unit of work that either is performed in
its entirety or it is not performed at all. This may
involve starting the work and then reversing the work if it is
interrupted or otherwise cannot be completed.
- It's possible to build memory units using only atomic
transactions, and these would be useful in multiprocessor
systems.
In C11, see <stdatomic.h>, described in Sec. 7.17 of
the C Standard. There are similar features in C++11.
- support for multiple threads of execution including an
improved memory sequencing model, atomic objects, and
thread-local storage (<stdatomic.h> and <threads.h>)
I/O Redirection with dup() and dup2()
#include <unistd.h>
int dup(int fd);
int dup2(int oldfd, int newfd);
// Returns nonnegative file descriptor if ok, -1 if not
dup2(fd1, fd2) is equivalent to
close(fd2)
- file descriptor
fd2 is duplicated from file
descriptor fd1
- return
fd2
- except that it's an atomic operation
To implement output redirection in a command shell, in the child
process created to run the command,
int fd = open(output_file,
O_WRONLY | O_CREAT | O_TRUNC, mode);
dup2(fd, STDOUT_FILENO);
Interprocess Pipes with pipe()
- Pipes (traditionally) are half-duplex -- data flows in only
one direction.
- Pipes can only be used between processes with a common
ancestor.
- Ancestry is determined by calls to fork(),
yielding parent and child.
- For example, the command
prog1 | prog2
would
cause the shell to create two child processes with a pipe
between them.
#include <unistd.h>
int pipe(int fd[2]);
// returns 0 if ok, -1 if not
- two file descriptors are created
fd[0] is open for reading
fd[1] is open for writing
- the output of
fd[1] is the input for fd[0]
- there is a buffer maintained by the OS that holds not-yet-read
bytes, and the OS will prevent over-filling the buffer
- You can use
fstat() to help determine if a file
descriptor is associated with a regular file or a pipe.
APUE, Fig. 15.5 (lightly edited)
- create a pipe
- open two file descriptors, as above
- fork
- the file descriptors are copied from parent to child, and
everything remains open
- close the unnecessary file descriptors
- one each in parent and child
- communicate parent to child via the pipe, as if ordinary I/O
// create a pipe from parent process to child process
// err_sys() prints a message and exits
int main(void)
{
int fd[2];
pid_t pid;
char line[MAXLINE];
if (pipe(fd) < 0)
err_sys("pipe error");
if ((pid = fork()) < 0)
{
err_sys("fork error");
}
else if (pid > 0) /* parent */
{
close(fd[0]); /* close read end */
write(fd[1], "hello world\n", 12);
}
else /* child */
{
close(fd[1]); /* close write end */
int n = read(fd[0], line, MAXLINE);
write(STDOUT_FILENO, line, n);
}
exit(0);
}
- Exercise. Why 12, and not 13?
For a more complex example, APUE Fig. 15.6 (lightly edited)
- parent generates output, child runs the pager program
- essentially, cat
filename | more
#include
<sys/wait.h>
#define DEF_PAGER "/bin/more" /* default pager
program */
// err_quit()
prints a message and exits
// not as severe an error as with err_sys()
int main(int argc, char *argv[])
{
int fd[2];
pid_t pid;
char *pager, *argv0;
char line[MAXLINE];
FILE *fp;
if (argc != 2)
err_quit("usage: a.out
<pathname>");
if ((fp = fopen(argv[1], "r")) == NULL)
err_sys("can't open %s", argv[1]);
if (pipe(fd) < 0)
err_sys("pipe error");
if ((pid = fork()) < 0)
{
err_sys("fork error");
}
else if (pid > 0)
{
/* parent */
close(fd[0]); /*
close read end */
/* parent copies
argv[1] to pipe */
while (fgets(line,
MAXLINE, fp) != NULL)
{
int n = strlen(line);
if (write(fd[1], line, n) != n)
err_sys("write error to pipe");
}
if (ferror(fp))
err_sys("fgets error");
close(fd[1]); /*
close write end of pipe for reader */
if (waitpid(pid, NULL,
0) < 0)
err_sys("waitpid error");
exit(0);
}
else
{
/* child */
close(fd[1]); /*
close write end */
if (fd[0] !=
STDIN_FILENO)
{
if (dup2(fd[0], STDIN_FILENO) != STDIN_FILENO)
err_sys("dup2 error to stdin");
close(fd[0]);
/* don't need this after dup2 */
}
/* get arguments for
execl() */
if ((pager =
getenv("PAGER")) == NULL)
pager =
DEF_PAGER;
if ((argv0 =
strrchr(pager, '/')) != NULL)
argv0++; /* step past rightmost slash */
else
argv0 =
pager; /* no slash in pager */
if (execl(pager,
argv0, (char *)0) < 0)
err_sys("execl error for %s", pager);
}
exit(0);
}
Pipes with popen() and pclose()
#include <stdio.h>
FILE * popen(const char *command, const
char *mode);
int pclose(FILE *stream);
This is in the Posix Standard; see the man pages.
Roughly, a new process is started by fork(), and its
program is replaced using
execl("/usr/bin/sh", "sh",
"-c", command, (char *)0);
Depending on the choice of mode, popen()
returns a writable or readable stream to the parent, and the child
is connected to the parent via stdin or stdout.
pclose() waits for the child process to terminate.
Last revised, 18 Feb. 2013