// kern/log.c
voidinitlog(intdev){if(sizeof(structlogheader)>=BSIZE)panic("\tinitlog: logheader is too big.\n");structsuperblocksb;initlock(&log.lock,"log");readsb(dev,&sb);log.start=sb.logstart;log.size=sb.nlog;log.dev=dev;recover_from_log();cprintf("initlog: success.\n");}
// inc/fs.h
/*
* Disk layout:
* [boot block | super block | log | inode blocks | free bit map | data blocks]
*
* mkfs computes the super block and builds an initial file system.
* The super block describes the disk layout:
*/structsuperblock{uint32_tsize;// Size of file system image (blocks)
uint32_tnblocks;// Number of data blocks
uint32_tninodes;// Number of inodes
uint32_tnlog;// Number of log blocks
uint32_tlogstart;// Block number of first log block
uint32_tinodestart;// Block number of first inode block
uint32_tbmapstart;// Block number of first free map block
};
// kern/log.c
/*
* Contents of the header block, used for both the on-disk header block
* and to keep track in memory of logged block # before commit.
*/structlogheader{intn;intblock[LOGSIZE];};structlog{structspinlocklock;intstart;intsize;intoutstanding;// How many FS sys calls are executing.
intcommitting;// In commit(), please wait.
intdev;structlogheaderlh;}log;
// kern/log.c
staticvoidrecover_from_log(){read_head();install_trans();// if committed, copy from log to disk
log.lh.n=0;write_head();// clear the log
}
// kern/log.c
/*
* Read the log header from disk into the in-memory log header.
*/staticvoidread_head(){structbuf*buf=bread(log.dev,log.start);structlogheader*lh=(structlogheader*)(buf->data);log.lh.n=lh->n;for(inti=0;i<log.lh.n;++i)log.lh.block[i]=lh->block[i];brelse(buf);}
// kern/log.c
/*
* Copy committed blocks from log to their home location.
*/staticvoidinstall_trans(){for(inti=0;i<log.lh.n;++i){structbuf*log_buf=bread(log.dev,log.start+i+1);structbuf*dst_buf=bread(log.dev,log.lh.block[i]);memmove(dst_buf->data,log_buf->data,BSIZE);brelse(log_buf);bwrite(dst_buf);brelse(dst_buf);}}
// kern/log.c
/*
* Write in-memory log header to disk.
* This is the true point at which the
* current transaction commits.
*/staticvoidwrite_head(){structbuf*buf=bread(log.dev,log.start);structlogheader*lh=(structlogheader*)(buf->data);lh->n=log.lh.n;for(inti=0;i<log.lh.n;++i)lh->block[i]=log.lh.block[i];bwrite(buf);brelse(buf);}
// kern/log.c
/*
* Caller has modified b->data and is done with the buffer.
* Record the block number and pin in the cache with B_DIRTY.
* commit() / write_log() will do the disk write.
*
* log_write() replaces bwrite(); a typical use is:
* bp = bread(...)
* modify bp->data[]
* log_write(bp)
* brelse(bp)
*/voidlog_write(structbuf*b){if(log.lh.n>=LOGSIZE||log.lh.n>=log.size-1)panic("\tlog_write: transaction is too big.\n");if(log.outstanding<1)panic("\tlog_write: outside of transaction.\n");acquire(&log.lock);inti=0;for(;i<log.lh.n;++i){if(log.lh.block[i]==b->blockno)break;// log absorption
}if(i==log.lh.n){log.lh.block[i]=b->blockno;++log.lh.n;}b->flags|=B_DIRTY;// prevent eviction
release(&log.lock);}
// kern/log.c
/*
* Called at the start of each FS system call.
*/voidbegin_op(){acquire(&log.lock);while(1){if(log.committing){sleep(&log,&log.lock);}elseif(log.lh.n+(log.outstanding+1)*MAXOPBLOCKS>LOGSIZE){// This op might exhaust log space; wait for commit.
sleep(&log,&log.lock);}else{++log.outstanding;release(&log.lock);break;}}}
// kern/log.c
/*
* Called at the end of each FS system call.
* Commits if this was the last outstanding operation.
*/voidend_op(){intdo_commit=0;acquire(&log.lock);--log.outstanding;if(log.committing)panic("\tend_op: log is committing.\n");if(!log.outstanding){do_commit=1;log.committing=1;}else{// begin_op() may be waiting for log space, and decrementing
// log.outstanding has decreased the amount of reserved space.
wakeup(&log);}release(&log.lock);if(do_commit){commit();acquire(&log.lock);log.committing=0;wakeup(&log);release(&log.lock);}}
// kern/log.c
staticvoidcommit(){if(log.lh.n>0){write_log();write_head();install_trans();log.lh.n=0;write_head();// erase the transaction from the log
}}
// kern/log.c
/*
* Write in-memory log header to disk.
* This is the true point at which the
* current transaction commits.
*/staticvoidwrite_head(){structbuf*buf=bread(log.dev,log.start);structlogheader*lh=(structlogheader*)(buf->data);lh->n=log.lh.n;for(inti=0;i<log.lh.n;++i)lh->block[i]=log.lh.block[i];bwrite(buf);brelse(buf);}
// inc/file.h
/*
* In-memory copy of an inode.
*/structinode{uint32_tdev;// Device number
uint32_tinum;// Inode number
intref;// Reference count
structsleeplocklock;// Protects everything below here
intvalid;// Inode has been read from disk?
uint16_ttype;// Copy of disk inode
uint16_tmajor;uint16_tminor;uint16_tnlink;uint32_tsize;uint32_taddrs[NDIRECT+1];};
// kern/fs.c
/*
* Allocate an inode on device dev.
*
* Mark it as allocated by giving it type type.
* Returns an unlocked but allocated and referenced inode.
*/structinode*ialloc(uint32_tdev,uint16_ttype){for(intinum=1;inum<sb.ninodes;++inum){structbuf*bp=bread(dev,IBLOCK(inum,sb));structdinode*dip=(structdinode*)bp->data+inum%IPB;if(!dip->type){// a free inode
memset(dip,0,sizeof(*dip));dip->type=type;log_write(bp);// mark it allocated on the disk
brelse(bp);returniget(dev,inum);}brelse(bp);}panic("\tialloc: no inodes.\n");return0;}
// kern/fs.c
/*
* Find the inode with number inum on device dev
* and return the in-memory copy. Does not lock
* the inode and does not read it from disk.
*/staticstructinode*iget(uint32_tdev,uint32_tinum){acquire(&icache.lock);// Is the inode already cached?
structinode*empty=NULL;for(inti=0;i<NINODE;++i){structinode*ip=&icache.inode[i];if(ip->ref>0&&ip->dev==dev&&ip->inum==inum){ip->ref++;release(&icache.lock);returnip;}if(!empty&&!ip->ref)empty=ip;// remember empty slot
}// Recycle an inode cache entry.
if(!empty)panic("\tiget: no inodes.\n");structinode*ip=empty;ip->dev=dev;ip->inum=inum;ip->ref=1;ip->valid=0;release(&icache.lock);returnip;}
// kern/fs.c
/*
* Copy a modified in-memory inode to disk.
*
* Must be called after every change to an ip->xxx field
* that lives on disk, since i-node cache is write-through.
* Caller must hold ip->lock.
*/voidiupdate(structinode*ip){structbuf*bp=bread(ip->dev,IBLOCK(ip->inum,sb));structdinode*dip=(structdinode*)bp->data+ip->inum%IPB;dip->type=ip->type;dip->major=ip->major;dip->minor=ip->minor;dip->nlink=ip->nlink;dip->size=ip->size;memmove(dip->addrs,ip->addrs,sizeof(ip->addrs));log_write(bp);brelse(bp);}
// kern/fs.c
/*
* Increment reference count for ip.
* Returns ip to enable ip = idup(ip1) idiom.
*/structinode*idup(structinode*ip){acquire(&icache.lock);ip->ref++;release(&icache.lock);returnip;}
// kern/fs.c
/*
* Lock the given inode.
* Reads the inode from disk if necessary.
*/voidilock(structinode*ip){if(!ip||ip->ref<1)panic("\tilock: invalid inode.\n");acquiresleep(&ip->lock);if(!ip->valid){structbuf*bp=bread(ip->dev,IBLOCK(ip->inum,sb));structdinode*dip=(structdinode*)bp->data+ip->inum%IPB;ip->type=dip->type;if(!ip->type){brelse(bp);panic("\tilock: no type.\n");}ip->major=dip->major;ip->minor=dip->minor;ip->nlink=dip->nlink;ip->size=dip->size;memmove(ip->addrs,dip->addrs,sizeof(ip->addrs));ip->valid=1;brelse(bp);}}
// kern/fs.c
/*
* Drop a reference to an in-memory inode.
*
* If that was the last reference, the inode cache entry can
* be recycled.
* If that was the last reference and the inode has no links
* to it, free the inode (and its content) on disk.
* All calls to iput() must be inside a transaction in
* case it has to free the inode.
*/voidiput(structinode*ip){acquire(&icache.lock);if(ip->ref==1&&ip->valid&&!ip->nlink){// ip->ref == 1 means no other process can have ip locked,
// so this acquiresleep() won't block (or deadlock).
acquiresleep(&ip->lock);release(&icache.lock);// inode has no links and no other references: truncate and free.
itrunc(ip);ip->type=0;iupdate(ip);ip->valid=0;releasesleep(&ip->lock);acquire(&icache.lock);}ip->ref--;release(&icache.lock);}
// kern/fs.c
/*
* Truncate inode (discard contents).
*
* Only called when the inode has no links
* to it (no directory entries referring to it)
* and has no in-memory reference to it (is
* not an open file or current directory).
*/staticvoiditrunc(structinode*ip){for(inti=0;i<NDIRECT;++i){if(ip->addrs[i]){bfree(ip->dev,ip->addrs[i]);ip->addrs[i]=0;}}if(ip->addrs[NDIRECT]){structbuf*bp=bread(ip->dev,ip->addrs[NDIRECT]);uint32_t*a=(uint32_t*)bp->data;for(intj=0;j<NINDIRECT;++j){if(a[j])bfree(ip->dev,a[j]);}brelse(bp);bfree(ip->dev,ip->addrs[NDIRECT]);ip->addrs[NDIRECT]=0;}ip->size=0;iupdate(ip);}
// kern/fs.c
/*
* Free a disk block.
*/staticvoidbfree(intdev,uint32_tb){structbuf*bp=bread(dev,BBLOCK(b,sb));intbi=b%BPB;intm=1<<(bi%8);if(!(bp->data[bi/8]&m))panic("\tbfree: freeing a free block.\n");bp->data[bi/8]&=~m;log_write(bp);brelse(bp);}
// kern/fs.c
/*
* Copy stat information from inode.
* Caller must hold ip->lock.
*/voidstati(structinode*ip,structstat*st){// FIXME: Support other fields in stat.
st->st_dev=ip->dev;st->st_ino=ip->inum;st->st_nlink=ip->nlink;st->st_size=ip->size;switch(ip->type){caseT_FILE:st->st_mode=S_IFREG;break;caseT_DIR:st->st_mode=S_IFDIR;break;caseT_DEV:st->st_mode=0;break;default:panic("\tstati: unexpected stat type %d.\n",ip->type);}}
// kern/fs.c
/*
* Read data from inode.
* Caller must hold ip->lock.
*/ssize_treadi(structinode*ip,char*dst,size_toff,size_tn){if(ip->type==T_DEV){if(ip->major<0||ip->major>=NDEV||!devsw[ip->major].read)return-1;returndevsw[ip->major].read(ip,dst,n);}if(off>ip->size||off+n<off)return-1;if(off+n>ip->size)n=ip->size-off;for(size_ttot=0,m=0;tot<n;tot+=m,off+=m,dst+=m){structbuf*bp=bread(ip->dev,bmap(ip,off/BSIZE));m=min(n-tot,BSIZE-off%BSIZE);memmove(dst,bp->data+off%BSIZE,m);brelse(bp);}returnn;}
// kern/fs.c
/*
* Inode content
*
* The content (data) associated with each inode is stored
* in blocks on the disk. The first NDIRECT block numbers
* are listed in ip->addrs[]. The next NINDIRECT blocks are
* listed in block ip->addrs[NDIRECT].
*
* Return the disk block address of the nth block in inode ip.
* If there is no such block, bmap allocates one.
*/staticuint32_tbmap(structinode*ip,uint32_tbn){if(bn<NDIRECT){// Load direct block, allocating if necessary.
uint32_taddr=ip->addrs[bn];if(!addr)ip->addrs[bn]=addr=balloc(ip->dev);returnaddr;}bn-=NDIRECT;if(bn<NINDIRECT){// Load indirect block, allocating if necessary.
uint32_taddr=ip->addrs[NDIRECT];if(!addr)ip->addrs[NDIRECT]=addr=balloc(ip->dev);structbuf*bp=bread(ip->dev,addr);uint32_t*a=(uint32_t*)bp->data;addr=a[bn];if(!addr){a[bn]=addr=balloc(ip->dev);log_write(bp);}brelse(bp);returnaddr;}panic("\tbmap: out of range.\n");return0;}
// kern/fs.c
/*
* Allocate a zeroed disk block.
*/staticuint32_tballoc(uint32_tdev){for(intb=0;b<sb.size;b+=BPB){structbuf*bp=bread(dev,BBLOCK(b,sb));for(intbi=0;bi<BPB&&b+bi<sb.size;++bi){intm=1<<(bi%8);if(!(bp->data[bi/8]&m)){// Is block free?
bp->data[bi/8]|=m;// Mark block in use.
log_write(bp);brelse(bp);bzero(dev,b+bi);returnb+bi;}}brelse(bp);}panic("\tballoc: out of blocks.\n");return0;}
// kern/fs.c
/*
* Write data to inode.
* Caller must hold ip->lock.
*/ssize_twritei(structinode*ip,char*src,size_toff,size_tn){if(ip->type==T_DEV){if(ip->major<0||ip->major>=NDEV||!devsw[ip->major].write)return-1;returndevsw[ip->major].write(ip,src,n);}if(off>ip->size||off+n<off)return-1;if(off+n>MAXFILE*BSIZE)return-1;for(size_ttot=0,m=0;tot<n;tot+=m,off+=m,src+=m){structbuf*bp=bread(ip->dev,bmap(ip,off/BSIZE));m=min(n-tot,BSIZE-off%BSIZE);memmove(bp->data+off%BSIZE,src,m);log_write(bp);brelse(bp);}if(n>0&&off>ip->size){ip->size=off;iupdate(ip);}returnn;}
// kern/file.c
/*
* Close file f. (Decrement ref count, close when reaches 0.)
*/voidfile_close(structfile*f){acquire(&ftable.lock);if(f->ref<1)panic("\tfile_close: invalid file.\n");if(--f->ref>0){release(&ftable.lock);return;}structfileff=*f;f->ref=0;f->type=FD_NONE;release(&ftable.lock);if(ff.type==FD_INODE){begin_op();iput(ff.ip);end_op();}else{panic("\tfile_close: unsupported type.\n");}}
// kern/file.c
/*
* Get metadata about file f.
*/intfile_stat(structfile*f,structstat*st){if(f->type==FD_INODE){ilock(f->ip);stati(f->ip,st);iunlock(f->ip);return0;}return-1;}
// kern/file.c
/*
* Write to file f.
*/ssize_tfile_write(structfile*f,char*addr,ssize_tn){if(!f->writable)return-1;if(f->type==FD_INODE){// Write a few blocks at a time to avoid exceeding the maximum log
// transaction size, including i-node, indirect block, allocation
// blocks, and 2 blocks of slop for non-aligned writes. This really
// belongs lower down, since writei() might be writing a device like the
// console.
intmax=((MAXOPBLOCKS-4)/2)*512;inti=0;while(i<n){intn1=n-i;if(n1>max)n1=max;begin_op();ilock(f->ip);intr=writei(f->ip,addr+i,f->off,n1);if(r>0)f->off+=r;iunlock(f->ip);end_op();if(r<0)break;if(r!=n1)panic("\tfile_write: partial data written.\n");i+=r;}returni==n?n:-1;}panic("\tfile_write: unsupported type.\n");return0;}
// inc/trap.h
structtrapframe{// Additional registers used to support musl
uint64_t_padding;// for 16-byte aligned
uint64_ttpidr_el0;__uint128_tq0;// Special Registers
uint64_tsp_el0;// Stack Pointer
uint64_tspsr_el1;// Program Status Register
uint64_telr_el1;// Exception Link Register
// General-Purpose Registers
uint64_tx0;uint64_tx1;uint64_tx2;uint64_tx3;uint64_tx4;uint64_tx5;uint64_tx6;uint64_tx7;uint64_tx8;uint64_tx9;uint64_tx10;uint64_tx11;uint64_tx12;uint64_tx13;uint64_tx14;uint64_tx15;uint64_tx16;uint64_tx17;uint64_tx18;uint64_tx19;uint64_tx20;uint64_tx21;uint64_tx22;uint64_tx23;uint64_tx24;uint64_tx25;uint64_tx26;uint64_tx27;uint64_tx28;uint64_tx29;// Frame Pointer
uint64_tx30;// Procedure Link Register
};
// kern/trap.c
voidtrap(structtrapframe*tf){intec=resr()>>EC_SHIFT,iss=resr()&ISS_MASK;lesr(0);// Clear esr.
switch(ec){caseEC_UNKNOWN:interrupt(tf);break;caseEC_SVC64:if(!iss){/* Jump to syscall to handle the system call from user process */tf->x0=syscall1(tf);}else{cprintf("trap: unexpected svc iss 0x%x\n",iss);}break;default:panic("\ttrap: unexpected irq.\n");}}
2.4 syscall.c
我们根据之前保存在寄存器 X8 的值,可以得到当前的 system call number。随后利用函数指针表 syscalls,即可进行相应的系统调用。
// kern/syscall.c
staticfuncsyscalls[]={[SYS_set_tid_address]=sys_gettid,[SYS_gettid]=sys_gettid,[SYS_ioctl]=sys_ioctl,[SYS_rt_sigprocmask]=sys_rt_sigprocmask,[SYS_brk]=(func)sys_brk,[SYS_execve]=sys_exec,[SYS_sched_yield]=sys_yield,[SYS_clone]=sys_clone,[SYS_wait4]=sys_wait4,// FIXME: exit_group should kill every thread in the current thread group.
[SYS_exit_group]=sys_exit,[SYS_exit]=sys_exit,[SYS_dup]=sys_dup,[SYS_chdir]=sys_chdir,[SYS_fstat]=sys_fstat,[SYS_newfstatat]=sys_fstatat,[SYS_mkdirat]=sys_mkdirat,[SYS_mknodat]=sys_mknodat,[SYS_openat]=sys_openat,[SYS_writev]=(func)sys_writev,[SYS_read]=(func)sys_read,[SYS_close]=sys_close,};