Index: linux-2.6.22-rc4/drivers/scsi/Kconfig =================================================================== --- linux-2.6.22-rc4.orig/drivers/scsi/Kconfig 2007-06-11 20:23:32.000000000 +0200 +++ linux-2.6.22-rc4/drivers/scsi/Kconfig 2007-06-11 20:24:20.000000000 +0200 @@ -75,6 +75,14 @@ In this case, do not compile the driver for your SCSI host adapter (below) as a module either. +config SD_IOSTATS + bool "Enable SCSI disk I/O stats" + depends on BLK_DEV_SD + default y + ---help--- + This enables SCSI disk I/O stats collection. You must also enable + /proc file system support if you want this feature. + config CHR_DEV_ST tristate "SCSI tape support" depends on SCSI Index: linux-2.6.22-rc4/drivers/scsi/sd.c =================================================================== --- linux-2.6.22-rc4.orig/drivers/scsi/sd.c 2007-06-11 20:23:32.000000000 +0200 +++ linux-2.6.22-rc4/drivers/scsi/sd.c 2007-06-11 20:33:35.000000000 +0200 @@ -244,6 +244,38 @@ .issue_flush = sd_issue_flush, }; +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +# include +# include + +typedef struct { + unsigned long long iostat_size; + unsigned long long iostat_count; +} iostat_counter_t; + +#define IOSTAT_NCOUNTERS 16 +typedef struct { + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; + struct timeval iostat_timeval; +} iostat_stats_t; + +iostat_stats_t **sd_iostats; +spinlock_t sd_iostats_lock; +struct proc_dir_entry *sd_iostats_procdir; +char sd_iostats_procdir_name[] = "sd_iostats"; + +extern void sd_iostats_init(void); +extern void sd_iostats_init_disk(struct gendisk *); +extern void sd_iostats_fini(void); +extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite); +#else +static inline void sd_iostats_init(void) {} +static inline void sd_iostats_init_disk(struct gendisk *disk) {} +static inline void sd_iostats_fini(void) {} +static inline void sd_iostats_bump(int disk, unsigned int nsect, int iswrite) {} +#endif + /* * Device no to disk mapping: * @@ -347,6 +379,9 @@ (unsigned long long)block, this_count)); + sd_iostats_bump(scsi_disk(disk)->index, this_count, + rq_data_dir(SCpnt->request) == WRITE); + if (!sdp || !scsi_device_online(sdp) || block + rq->nr_sectors > get_capacity(disk)) { SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, @@ -575,6 +610,8 @@ scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); } + sd_iostats_init_disk(disk); + return 0; error_out: @@ -1002,6 +1039,7 @@ return 1; } +#define SD_STATS 256 /* * spinup disk - called only in sd_revalidate_disk() */ @@ -1684,6 +1722,327 @@ return error; } +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +static int +sd_iostats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + struct gendisk *disk = seq->private; + iostat_stats_t *stats; + unsigned long long read_len; + unsigned long long read_len_tot; + unsigned long read_num; + unsigned long read_num_tot; + unsigned long long write_len; + unsigned long long write_len_tot; + unsigned long write_num; + unsigned long write_num_tot; + int i; + int maxi; + + if (sd_iostats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); + BUG(); + } + + stats = sd_iostats[scsi_disk(disk)->index]; + if (stats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); + BUG(); + } + + do_gettimeofday(&now); + now.tv_sec -= stats->iostat_timeval.tv_sec; + now.tv_usec -= stats->iostat_timeval.tv_usec; + if (now.tv_usec < 0) { + now.tv_usec += 1000000; + now.tv_sec--; + } + + /* this sampling races with updates */ + seq_printf(seq, "index: %u snapshot_time: %lu.%06lu\n", + scsi_disk(disk)->index, now.tv_sec, now.tv_usec); + + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) + if (stats->iostat_read_histogram[i].iostat_count != 0 || + stats->iostat_write_histogram[i].iostat_count != 0) + break; + maxi = i; + + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", + "reads", "total", "writes", "total"); + + read_len_tot = write_len_tot = 0; + read_num_tot = write_num_tot = 0; + for (i = 0; i <= maxi; i++) { + read_len = stats->iostat_read_histogram[i].iostat_size; + read_len_tot += read_len; + read_num = stats->iostat_read_histogram[i].iostat_count; + read_num_tot += read_num; + + write_len = stats->iostat_write_histogram[i].iostat_size; + write_len_tot += write_len; + write_num = stats->iostat_write_histogram[i].iostat_count; + write_num_tot += write_num; + + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", + 512<private_data)->private = PDE(inode)->data; + return 0; +} + +static ssize_t +sd_iostats_seq_write(struct file *file, const char *buffer, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct gendisk *disk = seq->private; + iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index]; + unsigned long flags; + + + spin_lock_irqsave (&sd_iostats_lock, flags); + memset (stats, 0, sizeof(*stats)); + do_gettimeofday(&stats->iostat_timeval); + spin_unlock_irqrestore (&sd_iostats_lock, flags); + + return len; +} + +static struct file_operations sd_iostats_proc_fops = { + .owner = THIS_MODULE, + .open = sd_iostats_seq_open, + .read = seq_read, + .write = sd_iostats_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +extern struct proc_dir_entry *proc_scsi; + +void +sd_iostats_init(void) +{ + int i; + + spin_lock_init(&sd_iostats_lock); + + sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL); + if (sd_iostats == NULL) { + printk(KERN_WARNING "Can't keep sd iostats: " + "ENOMEM allocating stats array size %ld\n", + SD_STATS * sizeof(iostat_stats_t *)); + return; + } + + for (i = 0; i < SD_STATS; i++) + sd_iostats[i] = NULL; + + if (proc_scsi == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "proc_scsi is NULL\n"); + return; + } + + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, + S_IFDIR | S_IRUGO | S_IXUGO, + proc_scsi); + if (sd_iostats_procdir == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); + return; + } +} + +void +sd_iostats_init_disk(struct gendisk *disk) +{ + struct proc_dir_entry *pde; + unsigned long flags; + iostat_stats_t *stats; + + if (sd_iostats == NULL || + sd_iostats_procdir == NULL) + return; + + if (scsi_disk(disk)->index > SD_STATS) { + printk(KERN_ERR "sd_iostats_init_disk: " + "unexpected disk index %d(%d)\n", + scsi_disk(disk)->index, SD_STATS); + return; + } + + if (sd_iostats[scsi_disk(disk)->index] != NULL) + return; + + stats = kmalloc(sizeof(*stats), GFP_KERNEL); + if (stats == NULL) { + printk(KERN_WARNING "Can't keep %s iostats: " + "ENOMEM allocating stats size %ld\n", + disk->disk_name, sizeof(*stats)); + return; + } + + memset (stats, 0, sizeof(*stats)); + do_gettimeofday(&stats->iostat_timeval); + + spin_lock_irqsave(&sd_iostats_lock, flags); + + if (sd_iostats[scsi_disk(disk)->index] != NULL) { + spin_unlock_irqrestore(&sd_iostats_lock, flags); + kfree (stats); + return; + } + + sd_iostats[scsi_disk(disk)->index] = stats; + + spin_unlock_irqrestore(&sd_iostats_lock, flags); + + pde = create_proc_entry(disk->disk_name, S_IRUGO | S_IWUSR, + sd_iostats_procdir); + if (pde == NULL) { + printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", + sd_iostats_procdir_name, disk->disk_name); + } else { + pde->proc_fops = &sd_iostats_proc_fops; + pde->data = disk; + } +} + +static void sd_devname(unsigned int disknum, char *buffer) +{ + if (disknum < 26) + sprintf(buffer, "sd%c", 'a' + disknum); + else { + unsigned int min1; + unsigned int min2; + /* + * For larger numbers of disks, we need to go to a new + * naming scheme. + */ + min1 = disknum / 26; + min2 = disknum % 26; + sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2); + } +} + +void +sd_iostats_fini(void) +{ + char name[6]; + int i; + + if (sd_iostats_procdir != NULL) { + for (i = 0; i < SD_STATS; i++) { + sd_devname(i, name); + remove_proc_entry(name, sd_iostats_procdir); + } + + if (proc_scsi == NULL) { + printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n"); + BUG(); + } + remove_proc_entry(sd_iostats_procdir_name, + proc_scsi); + + sd_iostats_procdir = NULL; + } + + if (sd_iostats != NULL) { + for (i = 0; i < SD_STATS; i++) { + if (sd_iostats[i] != NULL) + kfree (sd_iostats[i]); + } + + kfree(sd_iostats); + sd_iostats = NULL; + } +} + +void +sd_iostats_bump(int disk, unsigned int nsect, int iswrite) +{ + iostat_stats_t *stats; + iostat_counter_t *counter; + int bucket; + int tmp; + unsigned long irqflags; + + if (sd_iostats == NULL) + return; + + if (disk < 0 || disk >= SD_STATS) { + printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", + disk, SD_STATS); + BUG(); + } + + for (bucket = 0, tmp = nsect; tmp > 1; bucket++) + tmp /= 2; + + if (bucket >= IOSTAT_NCOUNTERS) { + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); + BUG(); + } + + spin_lock_irqsave(&sd_iostats_lock, irqflags); + + stats = sd_iostats[disk]; + if (stats != NULL) { + counter = iswrite ? + &stats->iostat_write_histogram[bucket] : + &stats->iostat_read_histogram[bucket]; + + counter->iostat_size += nsect; + counter->iostat_count++; + } + + spin_unlock_irqrestore(&sd_iostats_lock, irqflags); +} +#endif + /** * sd_remove - called whenever a scsi disk (previously recognized by * sd_probe) is detached from the system. It is called (potentially @@ -1855,6 +2214,7 @@ if (err) goto err_out_class; + sd_iostats_init(); return 0; err_out_class: @@ -1876,6 +2236,7 @@ SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); + sd_iostats_fini(); scsi_unregister_driver(&sd_template.gendrv); class_unregister(&sd_disk_class); Index: linux-2.6.22-rc4/drivers/scsi/scsi_proc.c =================================================================== --- linux-2.6.22-rc4.orig/drivers/scsi/scsi_proc.c 2007-06-11 20:23:32.000000000 +0200 +++ linux-2.6.22-rc4/drivers/scsi/scsi_proc.c 2007-06-11 20:24:20.000000000 +0200 @@ -40,7 +40,8 @@ /* 4K page size, but our output routines, use some slack for overruns */ #define PROC_BLOCK_SIZE (3*1024) -static struct proc_dir_entry *proc_scsi; +struct proc_dir_entry *proc_scsi; +EXPORT_SYMBOL(proc_scsi); /* Protect sht->present and sht->proc_dir */ static DEFINE_MUTEX(global_host_template_mutex);