Massive speed improvements during import

This commit is contained in:
mysticbbs 2013-04-01 03:29:10 -04:00
parent 13309e8b35
commit 975f4e201e
3 changed files with 126 additions and 71 deletions

View File

@ -407,10 +407,16 @@
; If you want to save duplicated / bad messages to a certain message
; base, then set this value to the *INDEX* of the message base (viewable
; at the top of the msgbase editor). Otherwise, set this value to -1
; or leave it commented out and they will be ignored.
; or leave it commented out and they will be ignored.
; dupe_msg_index = 5
; This defines the number of messages to keep in the duplicate database.
; Each message takes 8 bytes of data, so for example 32,000 messages takes
; 256kb of memory while importing messages. Max is 250,000 messages.
dupe_db_size = 32000
; If you want MUTIL to auto create message bases when it finds an
; echomail message for a non-existing base, set this value to true
; and use the options below to define the default values

View File

@ -14,7 +14,30 @@ Uses
mUtil_Common;
Const
MaxDupeChecks = 40000;
MaxDupeSize = 250000;
Type
RecMsgDupe = Record
Header : Cardinal;
Text : Cardinal;
End;
RecDupePTR = ^RecDupeArray;
RecDupeArray = Array[1..MaxDupeSize] of RecMsgDupe;
Type
TPKTDupe = Class
DupeData : RecDupePTR;
MaxDupes : Cardinal;
CurDupes : Cardinal;
TotalDupes : Cardinal;
Constructor Create (Max: Cardinal);
Destructor Destroy; Override;
Function IsDuplicate (Var D: RecMsgDupe) : Boolean;
Procedure AddDuplicate (Var D: RecMsgDupe);
End;
Const
pktPrivate = $0001;
@ -66,11 +89,6 @@ Type
Filler : Array[1..20] of Char;
End;
RecMsgDupe = Record
Header : Cardinal;
Text : Cardinal;
End;
RecMsgLine = String[79];
TPKTReader = Class
@ -79,7 +97,6 @@ Type
Dest : RecEchoMailAddr;
MsgHdr : RecPKTMessageHdr;
MsgFile : PCharFile;
DupeFile : PCharFile;
MsgTo : String[50];
MsgFrom : String[50];
MsgSubj : String[80];
@ -98,18 +115,86 @@ Type
Function Open (FN: String) : Boolean;
Function GetMessage (NetMail: Boolean) : Boolean;
Function IsDuplicate : Boolean;
Procedure AddDuplicate;
End;
Implementation
Constructor TPKTDupe.Create (Max: Cardinal);
Var
F : File;
RS : Cardinal;
Begin
Inherited Create;
If Max > MaxDupeSize Then Max := MaxDupeSize;
MaxDupes := Max;
TotalDupes := 0;
GetMem (DupeData, MaxDupes * SizeOf(RecMsgDupe));
Assign (F, bbsConfig.DataPath + 'echodupes.dat');
{$I-} Reset (F, 1); {$I+}
If IoResult <> 0 Then ReWrite (F, 1);
BlockRead (F, DupeData^, MaxDupes * SizeOf(RecMsgDupe), TotalDupes);
Close (F);
If TotalDupes > 0 Then
TotalDupes := TotalDupes DIV SizeOf(RecMsgDupe);
CurDupes := TotalDupes;
End;
Function TPKTDupe.IsDuplicate (Var D: RecMsgDupe) : Boolean;
Var
Count : Cardinal;
Begin
Result := False;
For Count := 1 to TotalDupes Do
If (D.Header = DupeData^[Count].Header) and (D.Text = DupeData^[Count].Text) Then Begin
Result := True;
Exit;
End;
End;
Procedure TPKTDupe.AddDuplicate (Var D: RecMsgDupe);
Begin
If CurDupes = MaxDupes Then Begin
TotalDupes := MaxDupes;
CurDupes := 0;
End;
Inc (CurDupes);
If TotalDupes < CurDupes Then
TotalDupes := CurDupes;
DupeData^[CurDupes] := D;
End;
Destructor TPKTDupe.Destroy;
Var
F : File;
Begin
Assign (F, bbsConfig.DataPath + 'echodupes.dat');
ReWrite (F, 1);
BlockWrite (F, DupeData^, TotalDupes * SizeOf(RecMsgDupe));
Close (F);
FreeMem (DupeData, MaxDupes * SizeOf(RecMsgDupe));
Inherited Destroy;
End;
Constructor TPKTReader.Create;
Begin
Opened := False;
MsgLines := 0;
MsgFile := New (PCharFile, Init(1024 * 4));
DupeFile := New (PCharFile, Init(1024 * 8));
MsgFile := New (PCharFile, Init(1024 * 16));
End;
Destructor TPKTReader.Destroy;
@ -117,12 +202,8 @@ Begin
DisposeText;
If MsgFile.Opened Then MsgFile.Close;
If DupeFile.Opened Then DupeFile.Close;
Dispose (MsgFile, Done);
Dispose (DupeFile, Done);
// TRIM DUPLICATE FILE HERE
Inherited Destroy;
End;
@ -286,40 +367,4 @@ Begin
Until False;
End;
Procedure TPKTReader.AddDuplicate;
Var
F: File;
Begin
Assign (F, bbsConfig.DataPath + 'echodupes.dat');
If Not ioReset (F, 1, fmRWDN) Then
ioReWrite (F, 1, fmRWDN);
Seek (F, FileSize(F));
BlockWrite (F, MsgCRC, SizeOf(RecMsgDupe));
Close (F);
End;
Function TPKTReader.IsDuplicate : Boolean;
Var
Dupe : RecMsgDupe;
Res : LongInt;
Begin
Result := False;
If Not DupeFile.Open (bbsConfig.DataPath + 'echodupes.dat') Then Exit;
While Not DupeFile.EOF Do Begin
DupeFile.BlockRead (Dupe, SizeOf(RecMsgDupe), Res);
If (Dupe.Text = MsgCRC.Text) and (Dupe.Header = MsgCRC.Header) Then Begin
Result := True;
Break;
End;
End;
DupeFile.Close;
End;
End.

View File

@ -69,7 +69,9 @@ Var
TotalDupes : LongInt;
EchoNode : RecEchoMailNode;
DupeIndex : LongInt;
DupeMBase : RecMessageBase;
CreateBases : Boolean;
Dupes : TPKTDupe;
Procedure ImportNetMailpacket (ArcFN: String);
Var
@ -152,28 +154,23 @@ Var
End;
While PKT.GetMessage(False) Do Begin
If PKT.IsDuplicate Then Begin
If Dupes.IsDuplicate(PKT.MsgCRC) Then Begin
Log (3, '!', ' Duplicate message found in ' + PKT.MsgArea);
If DupeIndex <> -1 Then Begin
CurTag := ''; // force next real msg to get mbase record
If (MsgBase <> NIL) and (CurTag <> '-DUPEMSG-') Then Begin
MsgBase^.CloseMsgBase;
// TODO for speed:
// load dupe base first before all processsing
// add a way to not close/reopen if last was dupe (simple boolean)
Dispose (MsgBase, Done);
If GetMBaseByIndex (DupeIndex, MBase) Then Begin
If MsgBase <> NIL Then Begin
MsgBase^.CloseMsgBase;
Dispose (MsgBase, Done);
MsgBase := NIL;
End;
MessageBaseOpen (MsgBase, MBase);
SavePKTMsgToBase (MsgBase, PKT, False);
MsgBase := NIL;
CurTag := '-DUPEMSG-';
End;
If MsgBase = NIL Then
MessageBaseOpen (MsgBase, DupeMBase);
SavePKTMsgToBase (MsgBase, PKT, False);
End;
Inc (TotalDupes);
@ -253,7 +250,7 @@ Var
SavePKTMsgToBase (MsgBase, PKT, False);
PKT.AddDuplicate;
Dupes.AddDuplicate(PKT.MsgCRC);
Inc (TotalEcho);
@ -272,8 +269,6 @@ Var
PKT.MsgFile.Close;
End;
// PKT.MsgFile.Close;
FileErase (TempPath + DirInfo.Name);
End;
@ -324,6 +319,13 @@ Begin
CreateBases := INI.ReadBoolean(Header_ECHOIMPORT, 'auto_create', False);
DupeIndex := INI.ReadInteger(Header_ECHOIMPORT, 'dupe_msg_index', -1);
Count := INI.ReadInteger(Header_ECHOIMPORT, 'dupe_db_size', 32000);
Dupes := TPKTDupe.Create(Count);
If DupeIndex <> -1 Then
If Not GetMBaseByIndex (DupeIndex, DupeMBase) Then
DupeIndex := -1;
FindFirst (bbsConfig.InboundPath + '*', AnyFile, DirInfo);
@ -367,6 +369,8 @@ Begin
FindClose (DirInfo);
Dupes.Free;
ProcessStatus ('Total |15' + strI2S(TotalEcho) + ' |07echo |15' + strI2S(TotalNet) + ' |07net |15' + strI2S(TotalDupes) + ' |07dupe', True);
ProcessResult (rDONE, True);
End;