Skip to content

Commit 6f6ec23

Browse files
wezellclaude
andauthored
feat(identifier): denormalize base_type onto identifier table (#35164)
## Summary - Adds `base_type INT4` column + `idx_identifier_base_type` index to the `identifier` table, mirroring `structure.structuretype` - `Identifier` bean, `IdentifierFactoryImpl`, and `IdentifierTransformer` updated to read/write `base_type` on every save - `Task260331AddBaseTypeColumnToIdentifier` adds the column and index at startup (DDL only — fast), then schedules the backfill job - `PopulateIdentifierBaseTypeJob` / `PopulateIdentifierBaseTypeUtil` backfill existing rows in batches of 500 (250 ms sleep between batches), fire immediately at startup, and self-unschedule when complete - `postgres.sql` updated for fresh installs ## Test plan - [ ] `Task260331AddBaseTypeColumnToIdentifierTest` — column/index created, idempotent, correct metadata - [ ] `PopulateIdentifierBaseTypeUtilTest` — sets correct base type per content type, ignores folders (null asset_subtype), returns 0 on second run - [ ] `PopulateIdentifierBaseTypeJobTest` — schedules, skips when no work pending, removes itself - [ ] `IdentifierFactoryTest` (5 new methods) — base_type persisted for CONTENT and FILEASSET, survives re-save, transformer maps it, folders get null fixes #35162 --------- Co-authored-by: Claude Sonnet 4.6 <[email protected]>
1 parent 1007280 commit 6f6ec23

14 files changed

Lines changed: 1090 additions & 3 deletions

dotCMS/src/main/java/com/dotmarketing/beans/Identifier.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public Identifier(final String id) {
6161
private String owner;
6262
private Date createDate;
6363
private String assetSubType;
64+
private Integer baseType;
6465

6566
/**
6667
* @deprecated As of 2016-05-16, replaced by {@link #getId()}
@@ -298,6 +299,14 @@ public void setAssetSubType(String assetSubType) {
298299
this.assetSubType = assetSubType;
299300
}
300301

302+
public Integer getBaseType() {
303+
return baseType;
304+
}
305+
306+
public void setBaseType(final Integer baseType) {
307+
this.baseType = baseType;
308+
}
309+
301310
@Override
302311
public String toString() {
303312
return "Identifier [id=" + id + ", assetName=" + assetName + ", assetType=" + assetType + ", parentPath=" + parentPath

dotCMS/src/main/java/com/dotmarketing/beans/transform/IdentifierTransformer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import static com.dotmarketing.business.IdentifierFactory.ASSET_NAME;
1313
import static com.dotmarketing.business.IdentifierFactory.ASSET_SUBTYPE;
1414
import static com.dotmarketing.business.IdentifierFactory.ASSET_TYPE;
15+
import static com.dotmarketing.business.IdentifierFactory.BASE_TYPE;
1516
import static com.dotmarketing.business.IdentifierFactory.CREATE_DATE;
1617
import static com.dotmarketing.business.IdentifierFactory.HOST_INODE;
1718
import static com.dotmarketing.business.IdentifierFactory.ID;
@@ -60,6 +61,10 @@ private static Identifier transform(final Map<String, Object> map) {
6061
identifier.setOwner((String) map.get(OWNER));
6162
identifier.setCreateDate((Date) map.get(CREATE_DATE));
6263
identifier.setAssetSubType((String) map.get(ASSET_SUBTYPE));
64+
final Object baseTypeObj = map.get(BASE_TYPE);
65+
if (baseTypeObj != null) {
66+
identifier.setBaseType(((Number) baseTypeObj).intValue());
67+
}
6368
return identifier;
6469
}
6570

dotCMS/src/main/java/com/dotmarketing/business/IdentifierFactory.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public abstract class IdentifierFactory {
3030
public static final String OWNER = "owner";
3131
public static final String CREATE_DATE = "create_date";
3232
public static final String ASSET_SUBTYPE = "asset_subtype";
33+
public static final String BASE_TYPE = "base_type";
3334

3435
/**
3536
* Retrieves all identifiers matching a URI pattern.

dotCMS/src/main/java/com/dotmarketing/business/IdentifierFactoryImpl.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ protected Identifier createNewIdentifier(final Versionable versionable, final Fo
358358
identifier.setParentPath(folder.getPath());
359359
identifier.setAssetName(uri);
360360
identifier.setAssetSubType(contentlet.getContentType().variable());
361+
identifier.setBaseType(contentlet.getContentType().baseType().getType());
361362
} else if (versionable instanceof WebAsset) {
362363
identifier.setURI(((WebAsset) versionable).getURI(folder));
363364
identifier.setAssetType(versionable.getVersionType());
@@ -440,6 +441,7 @@ protected Identifier createNewIdentifier ( Versionable versionable, Host site, S
440441
identifier.setParentPath("/");
441442
identifier.setAssetName(uri);
442443
identifier.setAssetSubType(cont.getContentType().variable());
444+
identifier.setBaseType(cont.getContentType().baseType().getType());
443445
} else if (versionable instanceof Link) {
444446
identifier.setAssetName(versionable.getInode());
445447
identifier.setParentPath("/");
@@ -448,6 +450,7 @@ protected Identifier createNewIdentifier ( Versionable versionable, Host site, S
448450
identifier.setAssetType(Identifier.ASSET_TYPE_CONTENTLET);
449451
identifier.setParentPath("/");
450452
identifier.setAssetSubType(Host.HOST_VELOCITY_VAR_NAME);
453+
identifier.setBaseType(((Contentlet) versionable).getContentType().baseType().getType());
451454
} else {
452455
identifier.setURI(uri);
453456
}
@@ -517,13 +520,30 @@ protected Identifier saveIdentifier(final Identifier id) throws DotDataException
517520
String query;
518521
if (UtilMethods.isSet(id.getId())) {
519522
if (isIdentifier(id.getId())) {
520-
query = "UPDATE identifier set parent_path=?, asset_name=?, host_inode=?, asset_type=?, syspublish_date=?, sysexpire_date=?, owner=?, create_date=?, asset_subtype=? where id=?";
523+
query = "UPDATE identifier set parent_path=?, asset_name=?, host_inode=?, asset_type=?, syspublish_date=?, sysexpire_date=?, owner=?, create_date=?, asset_subtype=?, base_type=? where id=?";
524+
// Guard against stale cached Identifiers (loaded before base_type was
525+
// denormalized) writing null back and temporarily undoing the backfill.
526+
// Only needed on UPDATE — INSERT callers must always supply base_type.
527+
if (id.getBaseType() == null && UtilMethods.isSet(id.getAssetSubType())) {
528+
final Integer resolvedBaseType = Try.of(() ->
529+
APILocator.getContentTypeAPI(APILocator.systemUser())
530+
.find(id.getAssetSubType())
531+
.baseType()
532+
.getType())
533+
.onFailure(e -> Logger.warn(IdentifierFactoryImpl.class,
534+
"Could not resolve base_type for asset_subtype=" + id.getAssetSubType()
535+
+ " on identifier=" + id.getId() + ": " + e.getMessage()))
536+
.getOrNull();
537+
if (resolvedBaseType != null) {
538+
id.setBaseType(resolvedBaseType);
539+
}
540+
}
521541
} else {
522-
query = "INSERT INTO identifier (parent_path,asset_name,host_inode,asset_type,syspublish_date,sysexpire_date,owner,create_date,asset_subtype,id) values (?,?,?,?,?,?,?,?,?,?)";
542+
query = "INSERT INTO identifier (parent_path,asset_name,host_inode,asset_type,syspublish_date,sysexpire_date,owner,create_date,asset_subtype,base_type,id) values (?,?,?,?,?,?,?,?,?,?,?)";
523543
}
524544
} else {
525545
id.setId(UUIDGenerator.generateUuid());
526-
query = "INSERT INTO identifier (parent_path,asset_name,host_inode,asset_type,syspublish_date,sysexpire_date,owner,create_date,asset_subtype,id) values (?,?,?,?,?,?,?,?,?,?)";
546+
query = "INSERT INTO identifier (parent_path,asset_name,host_inode,asset_type,syspublish_date,sysexpire_date,owner,create_date,asset_subtype,base_type,id) values (?,?,?,?,?,?,?,?,?,?,?)";
527547
}
528548

529549
DotConnect dc = new DotConnect();
@@ -539,6 +559,7 @@ protected Identifier saveIdentifier(final Identifier id) throws DotDataException
539559
dc.addParam(id.getOwner());
540560
dc.addParam(id.getCreateDate());
541561
dc.addParam(id.getAssetSubType());
562+
dc.addParam(id.getBaseType());
542563
dc.addParam(id.getId());
543564

544565

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
package com.dotmarketing.quartz.job;
2+
3+
import com.dotmarketing.common.db.DotConnect;
4+
import com.dotmarketing.exception.DotDataException;
5+
import com.dotmarketing.exception.DotRuntimeException;
6+
import com.dotmarketing.quartz.DotStatefulJob;
7+
import com.dotmarketing.quartz.QuartzUtils;
8+
import com.dotmarketing.util.Config;
9+
import com.dotmarketing.util.Logger;
10+
import com.google.common.annotations.VisibleForTesting;
11+
import io.vavr.Lazy;
12+
import io.vavr.control.Try;
13+
import org.quartz.JobDataMap;
14+
import org.quartz.JobDetail;
15+
import org.quartz.JobExecutionContext;
16+
import org.quartz.JobExecutionException;
17+
import org.quartz.SchedulerException;
18+
import org.quartz.SimpleTrigger;
19+
import org.quartz.TriggerUtils;
20+
21+
import java.util.Date;
22+
23+
import java.util.List;
24+
25+
/**
26+
* Quartz job that populates the {@code base_type} column on the {@code identifier} table in the
27+
* background. Scheduled by the startup task {@code Task260331AddBaseTypeColumnToIdentifier} so
28+
* that large customer databases are not blocked during startup.
29+
*
30+
* <p>The job removes itself from the scheduler once population completes — including when it
31+
* detects that no rows need updating (migration already finished).
32+
*/
33+
public class PopulateIdentifierBaseTypeJob extends DotStatefulJob {
34+
35+
private static final String CONFIG_PROPERTY_HOURS_INTERVAL =
36+
"populateIdentifierBaseTypeJob.hours.interval";
37+
private static final Lazy<Integer> HOURS_INTERVAL = Lazy.of(
38+
() -> Config.getIntProperty(CONFIG_PROPERTY_HOURS_INTERVAL, 4));
39+
40+
/**
41+
* Counts only rows that can actually be resolved by the populate query — i.e. rows with a
42+
* matching entry in the structure table. Orphaned identifiers whose asset_subtype no longer
43+
* exists in structure are intentionally excluded: the populate loop returns 0 for them and
44+
* they must not keep the job alive indefinitely.
45+
*/
46+
private static final String PENDING_ROWS_QUERY =
47+
"SELECT 1 FROM identifier i " +
48+
"INNER JOIN structure s ON s.velocity_var_name = i.asset_subtype " +
49+
"WHERE i.base_type IS NULL LIMIT 1";
50+
51+
@Override
52+
public void run(final JobExecutionContext jobContext) throws JobExecutionException {
53+
Logger.info(this, "PopulateIdentifierBaseTypeJob: starting");
54+
try {
55+
if (!hasPendingRows()) {
56+
Logger.info(this, "PopulateIdentifierBaseTypeJob: no rows with null base_type — migration already complete, unscheduling");
57+
removeJob();
58+
return;
59+
}
60+
61+
final int updated = new PopulateIdentifierBaseTypeUtil().populate();
62+
Logger.info(this, "PopulateIdentifierBaseTypeJob: populate() returned — " + updated + " rows updated");
63+
64+
// Only unschedule when the migration is confirmed complete.
65+
// populate() may return early due to interruption, leaving pending rows;
66+
// in that case we keep the job scheduled so it resumes on the next trigger.
67+
if (!hasPendingRows()) {
68+
Logger.info(this, "PopulateIdentifierBaseTypeJob: migration complete, unscheduling");
69+
removeJob();
70+
} else {
71+
Logger.info(this, "PopulateIdentifierBaseTypeJob: pending rows remain (interrupted or partial run) — will resume on next trigger");
72+
}
73+
} catch (final SchedulerException e) {
74+
Logger.error(this, "PopulateIdentifierBaseTypeJob: unable to remove job", e);
75+
throw new DotRuntimeException(e);
76+
}
77+
}
78+
79+
/**
80+
* Schedules the job. Skips scheduling if there are no rows with a null {@code base_type},
81+
* meaning the migration has already been completed. Safe to call multiple times — a no-op if
82+
* the job is already scheduled.
83+
*/
84+
public static void fireJob() {
85+
if (!hasPendingRows()) {
86+
Logger.info(PopulateIdentifierBaseTypeJob.class,
87+
"PopulateIdentifierBaseTypeJob: no rows with null base_type — migration already complete, skipping scheduling");
88+
return;
89+
}
90+
91+
final String jobName = getJobName();
92+
final String groupName = getJobGroupName();
93+
final var scheduler = QuartzUtils.getScheduler();
94+
95+
final var jobDetail = Try.of(() -> scheduler.getJobDetail(jobName, groupName))
96+
.onFailure(e -> Logger.error(PopulateIdentifierBaseTypeJob.class,
97+
String.format("Error retrieving job detail [%s, %s]", jobName, groupName), e))
98+
.getOrNull();
99+
100+
if (jobDetail != null) {
101+
Logger.info(PopulateIdentifierBaseTypeJob.class,
102+
String.format("Job [%s, %s] already scheduled — skipping", jobName, groupName));
103+
return;
104+
}
105+
106+
final var newJobDetail = new JobDetail(jobName, groupName, PopulateIdentifierBaseTypeJob.class);
107+
newJobDetail.setJobDataMap(new JobDataMap());
108+
newJobDetail.setDurability(false);
109+
newJobDetail.setVolatility(false);
110+
newJobDetail.setRequestsRecovery(true);
111+
112+
final var trigger = TriggerUtils.makeHourlyTrigger(HOURS_INTERVAL.get());
113+
trigger.setName(jobName);
114+
trigger.setGroup(groupName);
115+
trigger.setJobName(jobName);
116+
trigger.setJobGroup(groupName);
117+
trigger.setStartTime(new Date()); // fire immediately on first schedule
118+
trigger.setMisfireInstruction(SimpleTrigger.MISFIRE_INSTRUCTION_FIRE_NOW);
119+
120+
try {
121+
scheduler.scheduleJob(newJobDetail, trigger);
122+
Logger.info(PopulateIdentifierBaseTypeJob.class,
123+
"PopulateIdentifierBaseTypeJob scheduled successfully");
124+
} catch (final Exception e) {
125+
throw new DotRuntimeException("Error scheduling PopulateIdentifierBaseTypeJob", e);
126+
}
127+
}
128+
129+
/**
130+
* Returns {@code true} if any {@code identifier} rows still have a null {@code base_type}
131+
* and have a matching entry in the {@code structure} table — i.e., there is migration work
132+
* that the populate query can actually complete. Orphaned rows (asset_subtype with no
133+
* matching structure entry) are excluded so they cannot keep the job running indefinitely.
134+
*/
135+
public static boolean hasPendingRows() {
136+
try {
137+
final List<?> result = new DotConnect()
138+
.setSQL(PENDING_ROWS_QUERY)
139+
.loadObjectResults();
140+
return !result.isEmpty();
141+
} catch (final DotDataException e) {
142+
Logger.error(PopulateIdentifierBaseTypeJob.class,
143+
"Error checking for pending base_type rows: " + e.getMessage(), e);
144+
// Err on the side of running — better to do unnecessary work than skip needed work
145+
return true;
146+
}
147+
}
148+
149+
@VisibleForTesting
150+
static void removeJob() throws SchedulerException {
151+
QuartzUtils.removeJob(getJobName(), getJobGroupName());
152+
}
153+
154+
@VisibleForTesting
155+
static String getJobName() {
156+
return PopulateIdentifierBaseTypeJob.class.getSimpleName();
157+
}
158+
159+
@VisibleForTesting
160+
static String getJobGroupName() {
161+
return getJobName() + "_Group";
162+
}
163+
164+
}

0 commit comments

Comments
 (0)