001    /*
002     * Copyright (c) 2003, The Regents of the University of California, through
003     * Lawrence Berkeley National Laboratory (subject to receipt of any required
004     * approvals from the U.S. Dept. of Energy). All rights reserved.
005     */
006    package gov.lbl.dsd.sea.nio.util;
007    
008    import java.io.IOException;
009    
010    /**
011     * Encoder/Decoder implementing Consistent Overhead Byte Stuffing (COBS) for
012     * efficient, reliable, unambigous packet framing regardless of packet content,
013     * making it is easy for applications to recover from malformed packet payloads.
014     * <p>
015     * For details, see the <a
016     * href="http://www.stuartcheshire.org/papers/COBSforToN.pdf">paper </a>. In
017     * case the link is broken, get it from the <a
018     * href="http://www.stuartcheshire.org">paper's author </a>.
019     * <p>
020     * Quoting from the paper: "When packet data is sent over any serial medium, a
021     * protocol is needed by which to demarcate packet boundaries. This is done by
022     * using a special bit-sequence or character value to indicate where the
023     * boundaries between packets fall. Data stuffing is the process that transforms
024     * the packet data before transmission to eliminate any accidental occurrences
025     * of that special framing marker, so that when the receiver detects the marker,
026     * it knows, without any ambiguity, that it does indeed indicate a boundary
027     * between packets.
028     * <p>
029     * COBS takes an input consisting of bytes in the range [0,255] and produces an
030     * output consisting of bytes only in the range [1,255]. Having eliminated all
031     * zero bytes from the data, a zero byte can now be used unambiguously to mark
032     * boundaries between packets.
033     * <p>
034     * This allows the receiver to synchronize reliably with the beginning of the
035     * next packet, even after an error. It also allows new listeners to join a
036     * broadcast stream at any time and without failing to receive and decode the
037     * very next error free packet.
038     * <p>
039     * With COBS all packets up to 254 bytes in length are encoded with an overhead
040     * of exactly one byte. For packets over 254 bytes in length the overhead is at
041     * most one byte for every 254 bytes of packet data. The maximum overhead is
042     * therefore roughly 0.4% of the packet size, rounded up to a whole number of
043     * bytes. COBS encoding has low overhead (on average 0.23% of the packet size,
044     * rounded up to a whole number of bytes) and furthermore, for packets of any
045     * given length, the amount of overhead is virtually constant, regardless of the
046     * packet contents."
047     * <p>
048     * This class implements the original COBS algorithm, not the COBS/ZPE variant.
049     * <p>
050     * There holds: <code>decode(encode(src)) = src</code>.
051     * <p>
052     * Performance Note: The JDK 1.5 server VM runs <code>decode(encode(src))</code>
053     * at about 125 MB/s throughput on a commodity PC (2 GHz Pentium 4). Encoding is
054     * the bottleneck, decoding is extremely cheap. Obviously, this is way more
055     * efficient than Base64 encoding or similar application level byte stuffing
056     * mechanisms.
057     * 
058     * @author whoschek@lbl.gov
059     * @author $Author: hoschek3 $
060     * @version $Revision: 1.3 $, $Date: 2004/08/08 05:10:17 $
061     */
062    public class COBSCodec {
063    
064            protected COBSCodec() {} // not instantiable
065    
066            /**
067             * Returns the encoded representation of the given bytes.
068             * Inefficient method, but easy to use and understand.
069             * 
070             * @param src the bytes to encode
071             * @return the encoded bytes.
072             */
073            public static byte[] encode(byte[] src) {
074                    ArrayByteList dest = new ArrayByteList(maxEncodedSize(src.length));
075                    encode(src, 0, src.length, dest);
076                    dest.trimToSize();
077                    return dest.asArray();
078            }
079    
080            /**
081             * Adds (appends) the encoded representation of the range <code>src[from..to)</code> 
082             * to the given destination list.
083             * 
084             * @param src the bytes to encode
085             * @param from the first byte to encode (inclusive)
086             * @param to the last byte to encode (exclusive)
087             * @param dest the destination list to append to
088             */
089            public static void encode(byte[] src, int from, int to, ArrayByteList dest) {
090                    checkRange(from, to, src);
091                    dest.ensureCapacity(dest.size() + maxEncodedSize(to - from)); // for performance ensure add() will never need to expand list
092                    int code = 1; // can't use unsigned byte arithmetic...
093                    int blockStart = -1;
094                    
095                    // find zero bytes, then use bulk copy for best Java performance (unlike in C):
096                    while (from < to) {
097                            if (src[from] == 0) {
098                                    finishBlock(code, src, blockStart, dest, from - blockStart);
099                                    code = 1;
100                                    blockStart = -1;
101                            } 
102                            else {
103                                    if (blockStart < 0) blockStart = from;
104                                    code++;
105                                    if (code == 0xFF) {
106                                            finishBlock(code, src, blockStart, dest, from - blockStart + 1);
107                                            code = 1;
108                                            blockStart = -1;
109                                    }
110                            }
111                            from++;
112                    }
113    
114                    finishBlock(code, src, blockStart, dest, from - blockStart);
115            }
116    
117            private static void finishBlock(int code, byte[] src, int blockStart, ArrayByteList dest, int length) {
118                    dest.add((byte) code);
119                    if (blockStart >= 0) dest.add(src, blockStart, length);
120            }
121    
122            /**
123             * Returns the maximum amount of bytes an ecoding of <code>size</code> bytes takes in the worst case.
124             */
125            public static int maxEncodedSize(int size) {
126                    return size + 1 + size / 254;
127            }
128            
129            /**
130             * Returns the decoded representation of the given bytes.
131             * Inefficient method, but easy to use and understand.
132             * 
133             * @param src the bytes to decode
134             * @return the decoded bytes.
135             */
136            public static byte[] decode(byte[] src) throws IOException {
137                    ArrayByteList dest = new ArrayByteList(src.length);
138                    decode(src, 0, src.length, dest);
139                    dest.trimToSize();
140                    return dest.asArray();
141            }
142    
143            /**
144             * Adds (appends) the decoded representation of the range <code>src[from..to)</code> 
145             * to the given destination list.
146             * 
147             * @param src the bytes to decode
148             * @param from the first byte to decode (inclusive)
149             * @param to the last byte to decode (exclusive)
150             * @param dest the destination list to append to
151             * @throws IOException if src data is corrupt (encoded erroneously)
152             */
153            public static void decode(byte[] src, int from, int to, ArrayByteList dest) throws IOException {
154                    checkRange(from, to, src);
155                    dest.ensureCapacity(dest.size() + (to-from)); // for performance ensure add() will never need to expand list
156    
157                    while (from < to) {
158                            int code = src[from++] & 0xFF;
159                            int len = code - 1;
160                            if (code == 0 || from + len > to) throw new IOException("Corrupt COBS encoded data - bug in remote encoder?");
161                            dest.add(src, from, len);
162                            from += len;
163                            if (code < 0xFF && from < to) { // unnecessary to write last zero (is implicit anyway)
164                                    dest.add((byte) 0);
165                            }
166                    }
167            }
168    
169            /**
170             * Checks if the given range is within the contained array's bounds.
171             */
172            private static void checkRange(int from, int to, byte[] arr) {
173                    if (from < 0 || from > to || to > arr.length)
174                            throw new IndexOutOfBoundsException("from: " + from + ", to: "
175                                                    + to + ", size: " + arr.length);
176            }
177    
178    }