#! /usr/common/bin/gawk -f # # usage: testmore.awk EIS.file > NEW.file # or : testmore.awk EIS.file > /dev/null # # - this is version 1.3 ignoring obsolete keywords "huge" / "look": # - testmore ignores dead or dupe sequences (i.e. %K not modified) # - testmore forces (removes) "bref" for less (more) than 4 terms # - testmore ignores "more" if %K contains "hard","obsc", or "unkn" # - testmore removes "more" from remaining %K containing "full" # - testmore removes "more" from %K if the length of all terms + X # is greater than 200, where X is the length of the last term # - testmore COULD add "more" if %U is empty and no term is huge, # but this code is still disabled... # - all errors, changes, and unsolved problems reported in LOGFILE, # see below BEGIN (use e.g. file ./report_more or /dev/tty etc.) # #### report error ################################################# function ERROR( BAD ) { print "ignored line (" NR "): " substr( BAD, 1, 56 ) > LOGFILE return "" # caller resets bad ID } #### LOGFILE ###################################################### BEGIN { LOGFILE = "report_more" # keep error messages } #### note ID ###################################################### /^%I A/ { S = "" ; T = "" ; U = "" V = "" ; W = "" ; X = "" ID = $2 ; print ; next } #### wait for %I ################################################## ID == "" { print ; next # after error or %K } #### test ID ###################################################### ID != $2 { ID = ERROR( $0 ) ; print ; next } # report missing %I #### check S..X ################################################### /^%S A/ { if ( S == "" && $4 == "" ) S = $3 else ID = ERROR( $0 ) # accept only one %S } /^%T A/ { if ( T == "" && $4 == "" && S != "" ) T = $3 else ID = ERROR( $0 ) # only one %T after %S } /^%U A/ { if ( U == "" && $4 == "" && T != "" ) U = $3 else ID = ERROR( $0 ) # only one %U after %T } /^%V A/ { if ( V == "" && $4 == "" && S != "" ) V = $3 else ID = ERROR( $0 ) # only one %V after %S } /^%W A/ { if ( W == "" && $4 == "" && V != "" ) W = $3 else ID = ERROR( $0 ) # only one %W after %V } /^%X A/ { if ( X == "" && $4 == "" && W != "" ) X = $3 else ID = ERROR( $0 ) # only one %X after %W } /^%K A/ { if ( S == "" || $3 == "" || MORE == "" ) ID = ERROR( $0 ) # reject spurious %K } ID == "" { print ; next # copy erroneous line } #### note terms ################################################### /^%[S-X] A/ { if ( $1 == "%S" || $1 == "%V" ) MORE = $3 # concatenate terms in else MORE = MORE $3 # %S %T %U or %V %W %X print ; next # copy %S ... %X lines } #### skip dead #################################################### /^%K A.*d(ead|upe)/ { # reset skipped ID ID = "" ; print ; next } #### test %K "bref" ############################################### /^%K A.*/ { HUGE = 0 ; X = split( MORE, TERM, "," ) for ( T = 1 ; HUGE == 0 && T <= X ; T++ ) { HUGE = ( 30 < length( TERM[ T ] )) } # note any HUGE term for +more logic below S = index( $0, "bref" ) # remove / add "bref": if ( S > 0 && X > 4 ) # remove _all_ "bref" { gsub( /bref,/, "" ) ; NN++ sub( /,bref/, "" ) ; sub( /bref/, "" ) print $1, $2 ": -bref (" X ")" > LOGFILE } if ( S < 1 && X < 4 ) # force "bref" if < 4: { $3 = ( $3 == "" ? "bref" : $3 ",bref" ) gsub( /,,/, "," ) ; NN++ print $1, $2 ": +bref (" X ")" > LOGFILE } } #### skip hard etc. ############################################### /^%K A.*(hard|obsc|unkn)/ { # reset skipped ID ID = "" ; print ; next } #### test %K "full" ############################################### /^%K A.*full/ { if ( 0 < index( $0, "more" )) { gsub( /more,/, "" ) ; NN++ sub( /,more/, "" ) ; sub( /more/, "" ) print $1, $2 ": -more (full)" > LOGFILE } ID = "" ; print ; next } #### test %K "more" ############################################### /^%K A.*/ { X = 1 + match( MORE, /,-?[0-9]+$/ ) X = 1 + length( substr( MORE, X )) T = length( MORE ) # X = length last term S = index( $0, "more" ) # S is "more" position if ( S > 0 && T + X > 200 ) { gsub( /more,/, "" ) ; NN++ sub( /,more/, "" ) ; sub( /more/, "" ) print $1, $2 ": -more " T, X > LOGFILE } if ( S < 1 && U == "" && ! HUGE ) MM++ if ( S < 0 && U == "" && ! HUGE ) { $3 = ( $3 == "" ? "more" : $3 ",more" ) gsub( /,,/, "," ) ; NN++ print $1, $2 ": +more " T, X > LOGFILE } ID = "" ; print ; next } #### wait for %K ################################################## { print ; next # copy %N etc. } #### report number of patched sequences ########################### END { print NN++ " modifications" > LOGFILE print MM++ " +more skipped" > LOGFILE if ( LOGFILE == "/dev/tty" ) exit print "see report in " LOGFILE ":" > "/dev/tty" print --NN " modifications" > "/dev/tty" print --MM " +more skipped" > "/dev/tty" }