Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/SIL.Machine/Corpora/CorporaUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ public static string MergeVerseRanges(string verse1, string verse2)
return sb.ToString();
}

internal static IEnumerable<(string Id, string FileName)> GetFiles(IEnumerable<string> filePatterns)
internal static IEnumerable<(string Id, string FileName, int PatternIndex)> GetFiles(
IEnumerable<string> filePatterns
)
{
string[] filePatternArray = filePatterns.ToArray();
if (filePatternArray.Length == 1 && File.Exists(filePatternArray[0]))
{
yield return ("*all*", filePatternArray[0]);
yield return ("*all*", filePatternArray[0], 0);
}
else
{
Expand All @@ -89,7 +91,7 @@ public static string MergeVerseRanges(string verse1, string verse2)
string filePattern = filePatternArray[i];
if (File.Exists(filePattern))
{
yield return (i.ToString(CultureInfo.InvariantCulture), filePattern);
yield return (i.ToString(CultureInfo.InvariantCulture), filePattern, i);
continue;
}

Expand Down Expand Up @@ -145,7 +147,7 @@ public static string MergeVerseRanges(string verse1, string verse2)
if (sb.Length > 0)
id = sb.ToString();
}
yield return (id, fileName);
yield return (id, fileName, i);
}
}
}
Expand Down
30 changes: 15 additions & 15 deletions src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,37 @@ public FileParatextProjectFileHandler(string projectDir)

public bool Exists(string fileName)
{
return Directory
.EnumerateFiles(_projectDir)
.Any(f => Path.GetFileName(f).Equals(fileName, System.StringComparison.InvariantCultureIgnoreCase));
return GetFileName(fileName) != null;
}

public Stream Open(string fileName)
{
return File.OpenRead(
Path.Combine(
_projectDir,
Directory
.EnumerateFiles(_projectDir)
.FirstOrDefault(f =>
Path.GetFileName(f).Equals(fileName, System.StringComparison.InvariantCultureIgnoreCase)
)
)
);
fileName = GetFileName(fileName) ?? fileName;
return File.OpenRead(Path.Combine(_projectDir, fileName));
}

public UsfmStylesheet CreateStylesheet(string fileName)
{
string customStylesheetFileName = Path.Combine(_projectDir, "custom.sty");
string customStylesheetFileName = GetFileName("custom.sty");
return new UsfmStylesheet(
fileName,
File.Exists(customStylesheetFileName) ? customStylesheetFileName : null
customStylesheetFileName != null ? Path.Combine(_projectDir, customStylesheetFileName) : null
);
}

public string Find(string extension)
{
return Directory.EnumerateFiles(_projectDir, "*" + extension).FirstOrDefault();
}

private string GetFileName(string caseInsensitiveFileName)
{
return Directory
.EnumerateFiles(_projectDir)
.Select(p => Path.GetFileName(p))
.FirstOrDefault(f =>
f.Equals(caseInsensitiveFileName, System.StringComparison.InvariantCultureIgnoreCase)
);
}
}
}
30 changes: 30 additions & 0 deletions src/SIL.Machine/Corpora/KeyTerm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using System.Collections.Generic;
using System.Linq;
using SIL.Scripture;

public class KeyTerm
{
public string Id { get; }
public string Category { get; }
public string Domain { get; }
public IReadOnlyList<string> Renderings { get; }
public IReadOnlyList<VerseRef> References { get; }
public IReadOnlyList<string> RenderingsPatterns { get; }

public KeyTerm(
string id,
string category,
string domain,
IEnumerable<string> renderings,
IEnumerable<VerseRef> references,
IEnumerable<string> renderingsPatterns
)
{
Id = id;
Category = category;
Domain = domain;
Renderings = renderings.ToArray();
References = references.ToArray();
RenderingsPatterns = renderingsPatterns.ToArray();
}
}
13 changes: 9 additions & 4 deletions src/SIL.Machine/Corpora/NParallelTextCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ private IEnumerable<NParallelTextRow> CreateRows(
throw new ArgumentNullException("A corpus row must be specified.");

object[] defaultRefs = new object[] { rows.Where(r => r != null).Select(r => r.Ref).First() };
TextRowContentType contentType = TextRowContentType.Segment;

string textId = null;
object[][] refs = new object[N][];
TextRowFlags[] flags = new TextRowFlags[N];
Expand Down Expand Up @@ -327,7 +329,7 @@ private IEnumerable<NParallelTextRow> CreateRows(
}
refs = refs.Select(r => r ?? defaultRefs).ToArray();

yield return new NParallelTextRow(textId, refs)
yield return new NParallelTextRow(textId, refs, contentType)
{
NSegments = rows.Select(r => r?.Segment ?? Array.Empty<string>()).ToArray(),
NFlags = flags.ToReadOnlyList()
Expand Down Expand Up @@ -441,6 +443,7 @@ private class RangeRow
public bool IsSentenceStart { get; set; } = false;
public bool IsInRange => Refs.Count > 0;
public bool IsEmpty => Segment.Count == 0;
public TextRowContentType ContentType { get; set; } = TextRowContentType.Segment;
}

private class NRangeInfo
Expand All @@ -451,6 +454,7 @@ private class NRangeInfo
public IComparer<object> RowRefComparer { get; set; } = null;
public List<RangeRow> Rows { get; }
public bool IsInRange => Rows.Any(r => r.IsInRange);
public TextRowContentType ContentType { get; set; } = TextRowContentType.Segment;

public NRangeInfo(int n)
{
Expand All @@ -472,6 +476,7 @@ public void AddTextRow(TextRow row, int index)
}
TextId = row.TextId;
Rows[index].Refs.Add(row.Ref);
Rows[index].ContentType = row.ContentType;
if (Rows[index].IsEmpty)
Rows[index].IsSentenceStart = row.IsSentenceStart;
Rows[index].Segment.AddRange(row.Segment);
Expand All @@ -486,8 +491,8 @@ public NParallelTextRow CreateRow()
.ToList();
foreach (int i in Enumerable.Range(0, Rows.Count))
{
var row = Rows[i];

RangeRow row = Rows[i];
ContentType = row.ContentType;
if (Versifications.All(v => v != null) && row.Refs.Count() == 0)
{
refs[i] = referenceRefs
Expand All @@ -502,7 +507,7 @@ public NParallelTextRow CreateRow()
refs[i] = row.Refs.ToArray();
}
}
var nParRow = new NParallelTextRow(TextId, refs)
var nParRow = new NParallelTextRow(TextId, refs, ContentType)
{
NSegments = Rows.Select(r => r.Segment.ToArray()).ToArray(),
NFlags = Rows.Select(r => r.IsSentenceStart ? TextRowFlags.SentenceStart : TextRowFlags.None)
Expand Down
13 changes: 11 additions & 2 deletions src/SIL.Machine/Corpora/NParallelTextRow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ namespace SIL.Machine.Corpora
{
public class NParallelTextRow : IRow
{
public NParallelTextRow(string textId, IEnumerable<IReadOnlyList<object>> nRefs)
public NParallelTextRow(
string textId,
IEnumerable<IReadOnlyList<object>> nRefs,
TextRowContentType contentType = TextRowContentType.Segment
)
{
if (string.IsNullOrEmpty(textId))
throw new ArgumentNullException(nameof(textId));
Expand All @@ -21,6 +25,7 @@ public NParallelTextRow(string textId, IEnumerable<IReadOnlyList<object>> nRefs)
N = NRefs.Count;
NSegments = Enumerable.Range(0, N).Select(_ => Array.Empty<string>()).ToImmutableArray();
NFlags = Enumerable.Range(0, N).Select(_ => TextRowFlags.SentenceStart).ToImmutableArray();
ContentType = contentType;
}

public string TextId { get; }
Expand All @@ -32,6 +37,7 @@ public NParallelTextRow(string textId, IEnumerable<IReadOnlyList<object>> nRefs)

public IReadOnlyList<IReadOnlyList<string>> NSegments { get; set; }
public IReadOnlyList<TextRowFlags> NFlags { get; set; }
public TextRowContentType ContentType { get; }

public bool IsSentenceStart(int i) =>
NFlags.Count > i ? NFlags[i].HasFlag(TextRowFlags.SentenceStart) : throw new ArgumentOutOfRangeException();
Expand All @@ -48,7 +54,10 @@ public bool IsRangeStart(int i) =>

public NParallelTextRow Invert()
{
return new NParallelTextRow(TextId, NRefs.Reverse()) { NFlags = NFlags.Reverse().ToImmutableArray(), };
return new NParallelTextRow(TextId, NRefs.Reverse(), ContentType)
{
NFlags = NFlags.Reverse().ToImmutableArray(),
};
}
}
}
3 changes: 2 additions & 1 deletion src/SIL.Machine/Corpora/ParallelTextCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ public override IEnumerable<ParallelTextRow> GetRows(IEnumerable<string> textIds
yield return new ParallelTextRow(
nRow.TextId,
nRow.NRefs[0].Count > 0 || !isScripture ? nRow.NRefs[0] : new object[] { nRow.Ref },
nRow.NRefs[1].Count > 0 || !isScripture ? nRow.NRefs[1] : new object[] { nRow.Ref }
nRow.NRefs[1].Count > 0 || !isScripture ? nRow.NRefs[1] : new object[] { nRow.Ref },
nRow.ContentType
)
{
SourceFlags = nRow.NFlags[0],
Expand Down
11 changes: 9 additions & 2 deletions src/SIL.Machine/Corpora/ParallelTextRow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ namespace SIL.Machine.Corpora
{
public class ParallelTextRow : IRow
{
public ParallelTextRow(string textId, IReadOnlyList<object> sourceRefs, IReadOnlyList<object> targetRefs)
public ParallelTextRow(
string textId,
IReadOnlyList<object> sourceRefs,
IReadOnlyList<object> targetRefs,
TextRowContentType contentType = TextRowContentType.Segment
)
{
if (string.IsNullOrEmpty(textId))
throw new ArgumentNullException(nameof(textId));
Expand All @@ -17,6 +22,7 @@ public ParallelTextRow(string textId, IReadOnlyList<object> sourceRefs, IReadOnl
TextId = textId;
SourceRefs = sourceRefs;
TargetRefs = targetRefs;
ContentType = contentType;
}

public string TextId { get; }
Expand All @@ -36,6 +42,7 @@ public ParallelTextRow(string textId, IReadOnlyList<object> sourceRefs, IReadOnl

public TextRowFlags SourceFlags { get; set; } = TextRowFlags.SentenceStart;
public TextRowFlags TargetFlags { get; set; } = TextRowFlags.SentenceStart;
public TextRowContentType ContentType { get; }

public bool IsSourceSentenceStart => SourceFlags.HasFlag(TextRowFlags.SentenceStart);
public bool IsSourceInRange => SourceFlags.HasFlag(TextRowFlags.InRange);
Expand All @@ -51,7 +58,7 @@ public ParallelTextRow(string textId, IReadOnlyList<object> sourceRefs, IReadOnl

public ParallelTextRow Invert()
{
return new ParallelTextRow(TextId, TargetRefs, SourceRefs)
return new ParallelTextRow(TextId, TargetRefs, SourceRefs, ContentType)
{
SourceSegment = TargetSegment,
TargetSegment = SourceSegment,
Expand Down
11 changes: 7 additions & 4 deletions src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ public ParatextBackupTermsCorpus(
{
using (var archive = ZipFile.OpenRead(fileName))
{
IEnumerable<(string, IReadOnlyList<string>)> glosses = new ZipParatextProjectTermsParser(archive)
IEnumerable<KeyTerm> keyTerms = new ZipParatextProjectTermsParser(archive)
.Parse(termCategories, useTermGlosses, chapters)
.OrderBy(g => g.TermId);
.OrderBy(g => g.Id);

ParatextProjectSettings settings = ZipParatextProjectSettingsParser.Parse(archive);

Expand All @@ -26,8 +26,11 @@ public ParatextBackupTermsCorpus(

IText text = new MemoryText(
textId,
glosses.SelectMany(kvp =>
kvp.Item2.Select(gloss => new TextRow(textId, kvp.Item1) { Segment = new string[] { gloss } })
keyTerms.SelectMany(keyTerm =>
keyTerm.Renderings.Select(gloss => new TextRow(textId, keyTerm.Id, TextRowContentType.Word)
{
Segment = new string[] { gloss }
})
)
);
AddText(text);
Expand Down
Loading
Loading