/// Search module. /// Copyright: dd86k <dd@dax.moe> /// License: MIT /// Authors: $(LINK2 https://github.com/dd86k, dd86k) module searcher; import std.array : uninitializedArray; import core.stdc.string : memcmp; import editor, error; alias compare = memcmp; // avoids confusion with memcpy/memcmp /// Search buffer size. private enum BUFFER_SIZE = 4 * 1024; /*int data(ref Editor editor, out long pos, const(void) *data, size_t len, bool dir) { }*/ /// Binary search. /// Params: /// pos = Found position. /// data = Needle pointer. /// len = Needle length. /// dir = Search direction. If set, forwards. If unset, backwards. /// Returns: Error code if set. int searchData(out long pos, const(void) *data, size_t len, bool dir) { int function(out long, const(void)*, size_t) F = dir ? &forward : &backward; return F(pos, data, len); } /// Search for binary data forward. /// Params: /// pos = Found position in file. /// data = Data pointer. /// len = Data length. /// Returns: Error code if set. private int forward(out long newPos, const(void) *data, size_t len) { version (Trace) trace("data=%s len=%u", data, len); ubyte *needle = cast(ubyte*)data; /// First byte for data to compare with haystack. const ubyte firstByte = needle[0]; const bool firstByteOnly = len == 1; /// File buffer. ubyte[] fileBuffer = uninitializedArray!(ubyte[])(BUFFER_SIZE); /// Allocated if size is higher than one. /// Used to read file data to compare with needle if needle extends /// across haystack chunks. ubyte[] needleBuffer; if (firstByteOnly == false) needleBuffer = uninitializedArray!(ubyte[])(len); // Setup const long oldPos = editor.position; long pos = oldPos + 1; /// New haystack position editor.seek(pos); version (Trace) trace("start=%u", pos); size_t haystackIndex = void; /// haystack chunk index L_CONTINUE: ubyte[] haystack = editor.read(fileBuffer); const size_t haystackLen = haystack.length; // For every byte for (haystackIndex = 0; haystackIndex < haystackLen; ++haystackIndex) { // Check first byte if (haystack[haystackIndex] != firstByte) continue; // If first byte is indifferent and length is of 1, then // we're done. if (firstByteOnly) goto L_FOUND; // In-haystack or out-haystack check // Determined if needle fits within haystack (chunk) if (haystackIndex + len < haystackLen) // fits inside haystack { if (compare(haystack.ptr + haystackIndex, needle, len) == 0) goto L_FOUND; } else { // needle spans across haystacks const long t = pos + haystackIndex; // temporary seek editor.seek(t); // Go at chunk start + index ubyte[] tc = editor.read(needleBuffer); // Read needle length if (editor.eof) // Already hit past the end with needle length goto L_NOTFOUND; // No more chunks if (compare(tc.ptr, needle, len) == 0) goto L_FOUND; editor.seek(pos); // Negative, return to chunk start } } // Increase (search) position with chunk length. pos += haystackLen; // Check if last haystack. if (editor.eof == false) goto L_CONTINUE; // Not found L_NOTFOUND: editor.seek(oldPos); return errorSet(ErrorCode.notFound); L_FOUND: // Found newPos = pos + haystackIndex; // Position + Chunk index = Found position return 0; } /// Search for binary data backward. /// Params: /// pos = Found position in file. /// data = Data pointer. /// len = Data length. /// Returns: Error code if set. private int backward(out long newPos, const(void) *data, size_t len) { version (Trace) trace("data=%s len=%u", data, len); if (editor.position < 2) return errorSet(ErrorCode.insufficientSpace); ubyte *needle = cast(ubyte*)data; /// First byte for data to compare with haystack. const ubyte lastByte = needle[len - 1]; const bool lastByteOnly = len == 1; /// File buffer. ubyte[] fileBuffer = uninitializedArray!(ubyte[])(BUFFER_SIZE); /// Allocated if size is higher than one. /// Used to read file data to compare with needle if needle extends /// across haystack chunks. ubyte[] needleBuffer; if (lastByteOnly == false) // No need for buffer if needle is a byte needleBuffer = uninitializedArray!(ubyte[])(len); // Setup const long oldPos = editor.position; long pos = oldPos - 1; /// Chunk position editor.seek(pos); version (Trace) trace("start=%u", pos); size_t haystackIndex = void; size_t haystackLen = BUFFER_SIZE; ptrdiff_t diff = void; L_CONTINUE: pos -= haystackLen; // Adjusts buffer size to read chunk if it goes past start of haystack. if (pos < 0) { haystackLen += pos; fileBuffer = uninitializedArray!(ubyte[])(haystackLen); pos = 0; } editor.seek(pos); ubyte[] haystack = editor.read; // For every byte for (haystackIndex = haystackLen; haystackIndex-- > 0;) { // Check last byte if (haystack[haystackIndex] != lastByte) continue; // Fix when needle is one byte diff = 0; // If first byte is indifferent and length is of 1, then // we're done. if (lastByteOnly) goto L_FOUND; // Go at needle diff = haystackIndex - len + 1; // Go at needle[0] // In-haystack or out-haystack check // Determined if needle fits within haystack (chunk) if (diff >= 0) // fits inside haystack { if (compare(haystack.ptr + diff, needle, len) == 0) goto L_FOUND; } else { // needle spans across haystacks editor.seek(pos + diff); // temporary seek ubyte[] tc = editor.read(needleBuffer); // Read needle length if (compare(tc.ptr, needle, len) == 0) goto L_FOUND; editor.seek(pos); // Go back we where last } } // Acts like EOF. if (pos > 0) goto L_CONTINUE; // Not found editor.seek(oldPos); return errorSet(ErrorCode.notFound); L_FOUND: // Found newPos = pos + diff; // Position + Chunk index = Found position return 0; } /// Finds the next position indifferent to specified byte. /// Params: /// data = Byte. /// newPos = Found position. /// Returns: Error code. int searchSkip(ubyte data, out long newPos) { version (Trace) trace("data=0x%x", data); /// File buffer. ubyte[] fileBuffer = uninitializedArray!(ubyte[])(BUFFER_SIZE); size_t haystackIndex = void; const long oldPos = editor.position; long pos = oldPos + 1; L_CONTINUE: editor.seek(pos); ubyte[] haystack = editor.read(fileBuffer); const size_t haystackLen = haystack.length; // For every byte for (haystackIndex = 0; haystackIndex < haystackLen; ++haystackIndex) { if (haystack[haystackIndex] != data) goto L_FOUND; } // Increase (search) position with chunk length. pos += haystackLen; // Check if last haystack. // If haystack length is lower than the default size, // this simply means it's the last haystack since it reached // OEF (by having read less data). if (haystackLen == BUFFER_SIZE) goto L_CONTINUE; // Not found editor.seek(oldPos); return errorSet(ErrorCode.notFound); L_FOUND: // Found newPos = pos + haystackIndex; // Position + Chunk index = Found position return 0; }