Generally in any Solaris system, an increasing number of hard errors on a disk indicates that the disk is failed or about to fail. If the iostat -e is reporting a non-zero value for errors, these values will not be reset until you reboot the system. This could happen when you replace a failed disk with a new disk. The script works for both Solaris 10 and 11. This perl script supports resetting the sd and ssd kstat error counters. But beware of the warning below given by Oracle support.
The general command to check io errors on the disks is :
iostat -e ---- errors --- device s/w h/w trn tot sd3 0 0 1 1 sd4 0 0 0 0 sd5 0 0 0 0 nfs1 0 0 0 0 nfs5 0 0 0 0
Hard error : Re-read fails several times for CRC check
Transport error : Errors reported by I/O bus
Total errors : Soft error + Hard error + Transport errors
Execute this script as the root user as follows:
Usage :
# ./iostat-E_reset.pl Usage: iostat-E_reset.pl [sd|ssd] [instance number] [type] [type]... type values are hard, illrq, media, nodev, ntrdy, pfa recov, soft, and tran, type "all" can be used to reset all of the above type "io" can be used to reset soft, hard, and tran errors
Example :
# ./iostat-E_reset.pl sd 3 io Resetting sd_transerrs for instance 3, current value 0x1 Resetting sd_softerrs for instance 3, current value 0x0 Resetting sd_harderrs for instance 3, current value 0x0
The script
The per script basically modifies the kernel online to reset the io errors on the disk.
#!/usr/bin/perl # Oracle Corp Inc. # # This script uses mdb to reset the error counters of a LUN as described in the %types # hash table below. # # This script supports Solaris versions 10 and 11. # # The following document is the reference for this script. # (Doc ID 1012731.1) How to Reset the iostat -E Error Counters Without Rebooting # # THIS SCRIPT MODIFIES THE RUNNING KERNEL - USE IS AT YOUR OWN RISK. # # Date: 1/8/2014 use strict; use integer; use IPC::Open3; my $mdb = "/usr/bin/mdb"; my $os_rev = `/usr/bin/uname -r`; my $drv; my $inst; my $mdb_resp; my $soft_state; my $errstats; my $ks_data; my $ks_type; my %types = ( "hard" => 0, # Hard Errors "illrq" => 0, # Illegal Request Errors "media" => 0, # Media Errors "nodev" => 0, # No Device Errors "ntrdy" => 0, # Device Not Ready Errors "pfa" => 0, # Predictive Failure Analysis Errors "recov" => 0, # Recoverable Errors "soft" => 0, # Soft Errors "tran" => 0, # Transport Errors "all" => 0, # Reset all of the above "io" => 0 # Reset hard, soft, and tran errors ); my %trans = ( "hard" => "sd_harderrs", "illrq" => "sd_rq_illrq_err", "media" => "sd_rq_media_err", "nodev" => "sd_rq_nodev_err", "ntrdy" => "sd_rq_ntrdy_err", "pfa" => "sd_rq_pfa_err", "recov" => "sd_rq_recov_err", "soft" => "sd_softerrs", "tran" => "sd_transerrs" ); chomp($os_rev); sub usage { printf STDERR "Usage: iostat-E_reset.pl [sd|ssd] [instance number] [type] [type]...n"; printf STDERR " type values are hard, illrq, media, nodev, ntrdy, pfa recov, soft, and tran,n"; printf STDERR " type "all" can be used to reset all of the aboven"; printf STDERR " type "io" can be used to reset soft, hard, and tran errorsn"; exit 22; } usage() if @ARGV < 3 or $ARGV[0] !~ /^s?sd$/ or $ARGV[1] !~ /^d+$/; $drv = $ARGV[0]; shift; $inst = $ARGV[0]; shift; while (@ARGV > 0) { usage() if ! defined $types{$ARGV[0]}; $types{$ARGV[0]} = 1; shift; } if ($os_rev !~ /^5.(10|11)/) { die "Solaris version $os_rev is not supported.n"; } if ($> != 0) { die "You must be user root to run this script.n"; } open3(*MDB_WRT, *MDB_RD, "", "$mdb -kw") or die "Cannot execute mdb"; print MDB_WRT "*${drv}_state::softstate 0t${inst}n"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^(p{XDigit}+)$/) { $soft_state = $1; } elsif ($mdb_resp =~ /^mdb: instance p{XDigit}+ unused$/) { die "ERROR: Instance $inst is unusedn"; } else { print STDERR "ERROR: Reading softstate pointer for instance $instn"; die " Response: $mdb_respn"; } print MDB_WRT "${soft_state}::print struct sd_lun un_errstatsn"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^un_errstats = 0x(p{XDigit}+)$/) { $errstats = $1; } else { print STDERR "ERROR: Reading un_errstats pointer for softstate $soft_staten"; die " Response: $mdb_respn"; } print MDB_WRT "${errstats}::print kstat_t ks_datan"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^ks_data = 0x(p{XDigit}+)$/) { $ks_data = $1; } else { print STDERR "ERROR: Reading ks_data pointer for un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } if ($types{"all"}) { foreach my $type (keys %trans) { reset_counter($trans{$type}); } exit 0; } if ($types{"io"}) { $types{"hard"} = 1; $types{"soft"} = 1; $types{"tran"} = 1; } foreach my $type (keys %types) { next if $type eq "all"; next if $type eq "io"; reset_counter($trans{$type}) if $types{$type}; } exit 0; sub reset_counter { print MDB_WRT "${ks_data}::print struct sd_errstats $_[0].data_typen"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^$_[0].data_type = (0xp{XDigit}+)$/) { $ks_type = $1; if ($ks_type ne "0x2") { die "ERROR: Unsupported kstat data type $ks_type for $_[0]n"; } } else { print STDERR "ERROR: Reading data_type value for ks_data $ks_data un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } print MDB_WRT "${ks_data}::print -a struct sd_errstats $_[0].value.ui32n"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^(p{XDigit}+) $_[0].value.ui32 = (?:0x)?(p{XDigit}+)$/) { my $kstat_addr = $1; printf("Resetting %-15s for instance %5s, current value 0x%xn", $_[0], $inst, $2); print MDB_WRT "${kstat_addr}/W 0n"; $mdb_resp = <MDB_RD>; if ($mdb_resp !~ /^0x${kstat_addr}:s+(?:0x)?p{XDigit}+s+=s+0x0$/) { print STDERR "ERROR: Failed to write kstat counter address $kstat_addr,n"; print STDERR " ks_data $ks_data un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } } else { print STDERR "ERROR: Unable to obtain kstat counter address for $_[0] reset,n"; print STDERR " ks_data $ks_data un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } }
You can also download it from here.