001package org.unix4j.unix.sed;
002
003import org.unix4j.processor.LineProcessor;
004
005abstract class AbstractSedProcessor implements LineProcessor {
006        protected final Command command;
007        protected final SedArguments args;
008        protected final LineProcessor output;
009
010        public AbstractSedProcessor(Command command, SedArguments args, LineProcessor output) {
011                this.command = command;
012                this.args = args;
013                this.output = output;
014        }
015
016        @Override
017        public void finish() {
018                output.finish();
019        }
020
021        /**
022         * Returns the regexp operand from args, either called "regexp" or
023         * "string1". If none of the two is set, an empty string is returned.
024         * 
025         * @param args
026         *            the args with operand values
027         * @return the regexp argument from "regexp" or "string1" or an empty string
028         *         of none of the two operands is set
029         */
030        protected static String getRegexp(SedArguments args) {
031                if (args.isRegexpSet()) {
032                        return args.getRegexp();
033                }
034                if (args.isString1Set()) {
035                        return args.getString1();
036                }
037                return "";
038        }
039
040        /**
041         * Returns the replacement operand from args, either called "replacement" or
042         * "string2". If none of the two is set, an empty string is returned.
043         * 
044         * @param args
045         *            the args with operand values
046         * @return the replacement argument from "replacement" or "string2" or an
047         *         empty string of none of the two operands is set
048         */
049        protected static String getReplacement(SedArguments args) {
050                if (args.isReplacementSet()) {
051                        return args.getReplacement();
052                }
053                if (args.isString2Set()) {
054                        return args.getString2();
055                }
056                return "";
057        }
058
059        /**
060         * Returns the index of the next delimiter in the given sed script. The
061         * character at {@code indexOfPreviousDelimiter} is taken as delimiter. The
062         * method handles escaped delimiters and returns -1 if no further delimiter
063         * is found.
064         * 
065         * @param script
066         *            the script to analyze
067         * @param indexOfPreviousDelimiter
068         *            the index of the previous delimiter
069         * @return the index of the next delimiter after
070         *         {@code indexOfPreviousDelimiter}, or -1 if no further delimiter
071         *         exists of if {@code indexOfNextDelimiter < 0}
072         */
073        protected static int indexOfNextDelimiter(String script, int indexOfPreviousDelimiter) {
074                if (indexOfPreviousDelimiter < 0 || script.length() <= indexOfPreviousDelimiter) {
075                        return -1;
076                }
077                final char delim = script.charAt(indexOfPreviousDelimiter);
078                if (delim == '\\') {
079                        throw new IllegalArgumentException("invalid delimiter '\\' in sed script: " + script);
080                }
081                int index = indexOfPreviousDelimiter;
082                do {
083                        index = script.indexOf(delim, index + 1);
084                } while (index >= 0 && isEscaped(script, index));
085                return index;
086        }
087
088        private static boolean isEscaped(String script, int index) {
089                int backslashCount = 0;
090                index--;
091                while (index >= 0 && script.charAt(index) == '\\') {
092                        backslashCount++;
093                        index--;
094                }
095                // an uneven count of backslashes means that the character at position
096                // index is escaped (an even count of backslashes escapes backslashes)
097                return backslashCount % 2 == 1;
098        }
099}