Skip to content

Commit a6ed13c

Browse files
committed
Move title_sort to Rust
Also some automated cargo clippy fixes
1 parent d8f2b50 commit a6ed13c

8 files changed

Lines changed: 88 additions & 20 deletions

File tree

lib/bibdata_rs/src/marc/extract_values.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ pub trait ExtractValues<'a> {
55
where
66
C: Fn(&'a Field) -> bool,
77
E: Fn(&'a Field) -> Option<T>;
8+
9+
fn first_matching_field_value<C, E, T>(self, criteria: C, extractor: E) -> Option<T>
10+
where
11+
C: Fn(&'a Field) -> bool,
12+
E: Fn(&'a Field) -> Option<T>;
813
}
914

1015
impl<'a> ExtractValues<'a> for &'a Record {
@@ -21,4 +26,12 @@ impl<'a> ExtractValues<'a> for &'a Record {
2126
}
2227
})
2328
}
29+
30+
fn first_matching_field_value<C, E, T>(self, criteria: C, extractor: E) -> Option<T>
31+
where
32+
C: Fn(&'a Field) -> bool,
33+
E: Fn(&'a Field) -> Option<T>,
34+
{
35+
self.extract_field_values_by(criteria, extractor).next()
36+
}
2437
}

lib/bibdata_rs/src/marc/identifier.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ fn linked_record_control_numbers(record: &Record) -> impl Iterator<Item = String
4141
.extract_values("776w:787w")
4242
.into_iter()
4343
.filter_map(|value| {
44-
if is_oclc_number(&value) {
45-
Some(normalize_oclc_number(&value))
44+
if is_oclc_number(value) {
45+
Some(normalize_oclc_number(value))
4646
} else if value.contains('(') {
47-
Some(format!("BIB{}", strip_non_numeric(&value)))
47+
Some(format!("BIB{}", strip_non_numeric(value)))
4848
} else {
4949
None
5050
}

lib/bibdata_rs/src/marc/ruby_bindings.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ fn solr_fields(ruby: &Ruby, record: magnus::RObject) -> Result<RHash, magnus::Er
104104
.ok()
105105
.and_then(|date| date.maybe_to_string());
106106

107-
let hash = ruby.hash_new_capa(36);
107+
let hash = ruby.hash_new_capa(37);
108108
hash.aset("aat_s", ruby.ary_from_iter(genre::aat_s(&record)))?;
109109
hash.aset("action_notes_1display", action_notes_1display)?;
110110
hash.aset("access_restrictions_note_display", access_notes(&record))?;
@@ -188,6 +188,7 @@ fn solr_fields(ruby: &Ruby, record: magnus::RObject) -> Result<RHash, magnus::Er
188188
"standard_no_index",
189189
standard_numbers_for_ruby(ruby, &record),
190190
)?;
191+
hash.aset("title_sort", title::title_sort(&record))?;
191192
hash.aset("title_t", title_t)?;
192193

193194
Ok(hash)

lib/bibdata_rs/src/marc/ruby_bindings/marc_gem.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
/// This module is responsible for converting Marc data from Ruby's Marc gem to
22
/// Rust's marctk
3-
43
use magnus::{
54
Object, RArray, RClass, RObject, RString, Ruby,
65
value::{InnerRef, Lazy, LazyId, ReprValue},

lib/bibdata_rs/src/marc/title.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1+
use std::borrow::Cow;
2+
13
use crate::marc::{
24
extract_values::ExtractValues,
35
string_normalize::maybe_not_empty,
46
trim_punctuation,
5-
variable_length_field::{SubfieldIterator, latin_or_non_latin_tag_included_in},
7+
variable_length_field::{
8+
SubfieldIterator, join_subfields_by_code, latin_or_non_latin_tag_included_in,
9+
latin_tag_included_in,
10+
},
611
};
712
use itertools::Itertools;
813
use marctk::Record;
@@ -20,7 +25,7 @@ pub fn contains_titles_index(record: &Record) -> impl Iterator<Item = String> {
2025

2126
pub fn latin_script_title(record: &Record) -> Option<String> {
2227
record
23-
.extract_field_values_by(
28+
.first_matching_field_value(
2429
|field| field.tag() == "245",
2530
|field| {
2631
Some(
@@ -33,7 +38,33 @@ pub fn latin_script_title(record: &Record) -> Option<String> {
3338
)
3439
},
3540
)
36-
.next()
41+
}
42+
43+
pub fn title_sort(record: &Record) -> Option<String> {
44+
record.first_matching_field_value(latin_tag_included_in(&["245"]), |field| {
45+
let joined =
46+
join_subfields_by_code(field, &["a", "b", "c", "f", "g", "h", "k", "n", "p", "s"]);
47+
let non_filing_characters = field.ind2().parse::<u8>();
48+
let trimmed = match non_filing_characters {
49+
Ok(non_filing_characters) => {
50+
without_non_filing_characters(&joined, non_filing_characters).to_string()
51+
}
52+
Err(_) => joined,
53+
};
54+
maybe_not_empty(trimmed)
55+
})
56+
}
57+
58+
fn without_non_filing_characters<'a>(title: &'a str, non_filing_characters: u8) -> Cow<'a, str> {
59+
if non_filing_characters == 0 {
60+
Cow::Borrowed(title)
61+
} else {
62+
if title.len() > non_filing_characters.into() {
63+
Cow::Owned(title.chars().skip(non_filing_characters.into()).collect())
64+
} else {
65+
Cow::Borrowed(Default::default())
66+
}
67+
}
3768
}
3869

3970
#[cfg(test)]
@@ -49,4 +80,18 @@ mod tests {
4980
assert_eq!(contains_titles.next(), Some(String::from("زوراء")));
5081
assert_eq!(contains_titles.next(), None);
5182
}
83+
84+
#[test]
85+
fn it_can_find_title_sort() {
86+
let record = Record::from_breaker(r"=245 \4 $aThe octopus").unwrap();
87+
let title_sort = title_sort(&record).unwrap();
88+
assert_eq!(title_sort, "octopus");
89+
}
90+
91+
#[test]
92+
fn it_returns_title_sort_none_if_245_empty() {
93+
let record = Record::from_breaker(r"=245 \4 $a ").unwrap();
94+
let title_sort = title_sort(&record);
95+
assert_eq!(title_sort, None);
96+
}
5297
}

lib/bibdata_rs/src/marc/title/uniform_title.rs

Whitespace-only changes.

lib/bibdata_rs/src/marc/variable_length_field.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ where
3131
}
3232
}
3333

34+
pub fn latin_tag_included_in(tags: &[&str]) -> impl Fn(&Field) -> bool {
35+
|field| tags.contains(&field.tag())
36+
}
37+
3438
pub fn latin_or_non_latin_tag_included_in(tags: &[&str]) -> impl Fn(&Field) -> bool {
3539
|field| tags.contains(&field.tag()) || non_latin_tag_included_in(tags)(field)
3640
}
@@ -53,13 +57,23 @@ pub fn join_all_subfields(field: &Field) -> String {
5357
join_subfields(field.subfields().iter())
5458
}
5559

60+
pub fn join_subfields_by_code(field: &Field, include: &[&str]) -> String {
61+
join_subfields(filter_subfields(field, |subfield| {
62+
include.contains(&subfield.code())
63+
}))
64+
}
65+
5666
pub fn join_subfields_except(field: &Field, exclude: &[&str]) -> String {
57-
join_subfields(
58-
field
59-
.subfields()
60-
.iter()
61-
.filter(|subfield| !exclude.contains(&subfield.code())),
62-
)
67+
join_subfields(filter_subfields(field, |subfield| {
68+
!exclude.contains(&subfield.code())
69+
}))
70+
}
71+
72+
fn filter_subfields(
73+
field: &Field,
74+
filter: impl Fn(&&Subfield) -> bool,
75+
) -> impl Iterator<Item = &Subfield> {
76+
field.subfields().iter().filter(filter)
6377
}
6478

6579
pub fn join_subfields<'a>(subfields: impl Iterator<Item = &'a Subfield>) -> String {

marc_to_solr/lib/traject_config.rb

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,8 @@
175175
to_field 'title_vern_display', extract_marc('245abcfghknps', alternate_script: :only, first: true)
176176

177177
# to_field 'title_sort', marc_sortable_title
178-
to_field 'title_sort' do |record, accumulator|
179-
MarcExtractor.cached('245abcfghknps', alternate_script: false).collect_matching_lines(record) do |field, spec, extractor|
180-
str = extractor.collect_subfields(field, spec).first
181-
str = str.slice(field.indicator2.to_i, str.length) if str
182-
accumulator << str if accumulator[0].nil?
183-
end
178+
to_field 'title_sort' do |_record, accumulator, context|
179+
accumulator << context.clipboard[:solr_fields]['title_sort'] if context.clipboard[:solr_fields]['title_sort']
184180
end
185181

186182
to_field 'title_vern_sort' do |record, accumulator|

0 commit comments

Comments
 (0)